From aae0563b36417f703796a7234033800aea2c5bcf Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Mon, 27 Oct 2025 14:33:49 -0700 Subject: [PATCH 01/12] update for 3.0.1 cran --- .Rbuildignore | 3 +- DESCRIPTION | 3 +- R/classification.R | 2 +- R/detection.R | 3 +- R/install.R | 23 +- R/old/applyPredictions.R | 33 -- R/old/bestGuess.R | 84 ------ R/old/convertCoordinates.R | 58 ---- R/old/detect.R | 306 ------------------- R/old/extractBoxes.R | 318 -------------------- R/old/generator.R | 397 ------------------------- R/old/loadMDModel.R | 34 --- R/old/megadetector.R | 35 --- R/old/parseMDjson.R | 48 --- R/old/predictSpecies.R | 47 --- R/old/setupEnvironment.R | 19 -- R/old/zzz.R | 30 -- R/reid.R | 4 +- R/zzz.R | 1 - inst/animlenv.yml | 74 ----- man/classify.Rd | 2 +- man/compute_batched_distance_matrix.Rd | 4 +- man/load_detector.Rd | 3 +- 23 files changed, 24 insertions(+), 1507 deletions(-) delete mode 100644 R/old/applyPredictions.R delete mode 100644 R/old/bestGuess.R delete mode 100644 R/old/convertCoordinates.R delete mode 100644 R/old/detect.R delete mode 100644 R/old/extractBoxes.R delete mode 100644 R/old/generator.R delete mode 100644 R/old/loadMDModel.R delete mode 100644 R/old/megadetector.R delete mode 100644 R/old/parseMDjson.R delete mode 100644 R/old/predictSpecies.R delete mode 100644 R/old/setupEnvironment.R delete mode 100644 R/old/zzz.R delete mode 100644 inst/animlenv.yml diff --git a/.Rbuildignore b/.Rbuildignore index 2f99099..288ace1 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,4 +6,5 @@ ^.Rhistory ^.*\.Rproj$ ^\.Rproj\.user$ -^R/old/* \ No newline at end of file +^R/old/* +^.git \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index b8c0935..15795e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: animl Title: A Collection of ML Tools for Conservation Research -Version: 3.0.0 +Version: 3.0.1 Authors@R: c(person(given="Kyra", family="Swanson",email="tswanson@sdzwa.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1496-3217")), person(given="Mathias",family="Tobler",role = "aut")) Description: Functions required to classify subjects within camera trap field data. The package can handle both images and videos. The authors recommend a two-step approach using Microsoft's 'MegaDector' model and then a second model trained on the classes of interest. @@ -12,7 +12,6 @@ Imports: methods, pbapply, reticulate, - parallel, stats, Depends: R (>= 4.0.0) diff --git a/R/classification.R b/R/classification.R index 9b279d4..7db4e22 100644 --- a/R/classification.R +++ b/R/classification.R @@ -47,7 +47,7 @@ save_classifier <- function(model, out_dir, epoch, stats, optimizer=NULL, schedu #' @examples #' \dontrun{classes <- load_class_list('andes_classes.csv')} load_class_list <- function(classlist_file){ - read.csv(classlist_file) + utils::read.csv(classlist_file) } diff --git a/R/detection.R b/R/detection.R index a0e7be0..5b505c9 100644 --- a/R/detection.R +++ b/R/detection.R @@ -8,7 +8,8 @@ #' @export #' #' @examples -#' \dontrun{md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt", model_type='mdv5', device = 'cuda:0')} +#' \dontrun{md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt", +#' model_type='mdv5', device='cuda:0')} load_detector <- function(model_path, model_type, device=NULL){ # first check if animl-py is loaded animl_py$load_detector(model_path, model_type=model_type, device=device) diff --git a/R/install.R b/R/install.R index 43ed489..4bfa8cf 100644 --- a/R/install.R +++ b/R/install.R @@ -1,5 +1,5 @@ # VARIABLE FOR VERSION -ANIML_VERSION <- "3.0.0" +ANIML_VERSION <- "3.0.1" animl_py <- NULL #' Create a miniconda environment for animl and install animl-py @@ -19,24 +19,24 @@ animl_install <- function(py_env = "animl_env", python_version = "3.12", confirm=TRUE) { # 1. Load environment if exists - message(sprintf("1. Loading Python Environment (%s)...", py_env)) + packageStartupMessage(sprintf("1. Loading Python Environment (%s)...", py_env)) try_error <- try(reticulate::use_condaenv(py_env, required = TRUE), silent=TRUE) # 2. Install if not exists if (inherits(try_error, "try-error")) { - message(sprintf("%s not found \n", py_env)) + packageStartupMessage(sprintf("%s not found \n", py_env)) # 2. Create new environment - message("\n", sprintf("2. Creating a Python Environment (%s)", py_env)) + packageStartupMessage("\n", sprintf("2. Creating a Python Environment (%s)", py_env)) animl_path <- tryCatch(expr = create_pyenv(python_version = python_version, py_env = py_env), error = function(e) stop(e, "An error occur when animl_install was creating the Python Environment.", "Check that you've accepted the conda TOS and restart the R session, before trying again.")) #print(animl_path) # 3. Install animl-py - message("\n3. Installing animl-py...") - package = sprintf("animl==%s", animl_version) + packageStartupMessage("\n3. Installing animl-py...") + package <- sprintf("animl==%s", animl_version) reticulate::py_install(package, envname=py_env, pip=TRUE) - message("animl successfully installed. Restart R session to see changes.\n") + packageStartupMessage("animl successfully installed. Restart R session to see changes.\n") invisible(TRUE) return(FALSE) } @@ -60,17 +60,12 @@ load_animl_py <- function() { } else{ stop('animl_env environment must be loaded first via reticulate') } - message("animl-py loaded successfully.") + packageStartupMessage("animl-py loaded successfully.") return(animl_py) } -animl_update <- function(){ - - -} - #' Check that the python version is compatible with the current version of animl-py #' @@ -92,7 +87,7 @@ check_python <- function(initialize = TRUE) { if (utils::compareVersion(as.character(py_version), "3.9") == -1) { stop("animl needs Python >=3.9") } - message(sprintf("Python version %s compatible with animl.", py_version)) + packageStartupMessage(sprintf("Python version %s compatible with animl.", py_version)) } diff --git a/R/old/applyPredictions.R b/R/old/applyPredictions.R deleted file mode 100644 index f7f6397..0000000 --- a/R/old/applyPredictions.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Apply Classifier Predictions and Merge DataFrames -#' -#' @param animals Set of animal crops/images -#' @param pred Classifier predictions for animal crops/images -#' @param classfile .txt file containing common names for species classes -#' @param outfile File to which results are saved -#' @param counts Returns a table of all predictions, defaults to FALSE -#' -#' @return fully merged dataframe with Species predictions and confidence weighted by MD conf -#' @importFrom methods is -#' @export -#' -#' @examples -#' \dontrun{ -#' alldata <- applyPredictions(animals,empty,classfile,pred,counts = FALSE) -#' } -applyPredictions <- function(animals, pred, classfile, outfile = NULL, counts = FALSE) { - if (checkFile(outfile)) { return(loadData(outfile))} - if (!is(animals, "data.frame")) { stop("'animals' must be DataFrame.")} - if (!file.exists(classfile)) { stop("The given class file does not exist.")} - - classes <- utils::read.table(classfile, stringsAsFactors = F)$x - - animals$prediction <- classes[apply(pred, 1, which.max)] - animals$confidence <- apply(pred, 1, max) * animals$conf - - if (counts) { table(classes[apply(pred, 1, which.max)])} - - # save data - if(!is.null(outfile)) { saveData(animals, outfile)} - - animals -} \ No newline at end of file diff --git a/R/old/bestGuess.R b/R/old/bestGuess.R deleted file mode 100644 index 867bf8f..0000000 --- a/R/old/bestGuess.R +++ /dev/null @@ -1,84 +0,0 @@ -#' Select Best Classification From Multiple Frames -#' -#' @param manifest dataframe of all frames including species classification -#' @param sort method for selecting best prediction, defaults to most frequent -#' @param count if true, return column with number of MD crops for that animal (does not work for images) -#' @param shrink if true, return a reduced dataframe with one row per image -#' @param outfile file path to which the data frame should be saved -#' @param prompt if true, prompts the user to confirm overwrite -#' @param parallel Toggle for parallel processing, defaults to FALSE -#' @param workers number of processors to use if parallel, defaults to 1 -#' -#' @return dataframe with new prediction in "Species" column -#' @import dplyr -#' @importFrom methods is -#' @export -#' -#' @examples -#' \dontrun{ -#' mdmanifest <- bestGuess(manifest, sort = "conf") -#' } -bestGuess <- function(manifest, sort = "count", count = FALSE, shrink = FALSE, - outfile = NULL, prompt = TRUE, parallel = FALSE, workers = 1) { - - if (checkFile(outfile)) { return(loadData(outfile))} - if (!is(manifest, "data.frame")) { stop("'manifest' must be DataFrame")} - - videonames <- unique(manifest$FilePath) - steps <- length(videonames) - - run.parallel <- function(i){ - sequence <- manifest[manifest$FilePath == videonames[i], ] - - guesses <- sequence %>% dplyr::group_by(prediction) %>% dplyr::summarise(confidence = max(confidence), n = dplyr::n()) - - #most confident - if (sort == "conf") { - guess <- guesses[which.max(guesses$confidence), ] - if (guess$prediction == "empty" && nrow(guesses) > 1) { - guesses <- guesses[guesses$prediction != "empty", ] - guess <- guesses[which.max(guesses$confidence), ] - } - } - #most frequent unless empty - else if (sort == "count") { - guesses <- guesses[order(guesses$confidence, decreasing=TRUE),] - best <- which.max(guesses$n) - guess <- guesses[best, ] - if (guess$prediction == "empty" && nrow(guesses) > 1) { - guesses <- guesses[guesses$prediction != "empty", ] - guess <- guesses[which.max(guesses$n), ] - } - } - else { stop("Must select guess by 'conf' (confidence) or by 'count' (frequency)") } - #print(guess) - sequence$prediction <- guess$prediction - sequence$confidence <- guess$confidence - - if (count) { sequence$count <- guess$n } - # one entry per image/video - if(shrink){ sequence = sequence[!duplicated(sequence$FilePath),] } - - sequence - } - - if (parallel) { - cl <- parallel::makeCluster(min(parallel::detectCores(), workers), type = "PSOCK") - parallel::clusterExport(cl, list("sort", "count", '%>%'), envir = environment()) - parallel::clusterSetRNGStream(cl) - - results <- pbapply::pblapply(1:steps, function(x) { run.parallel(x) }, cl = cl) - parallel::stopCluster(cl) - } - else { - results <- pbapply::pblapply(1:steps, function(x) { run.parallel(x) }) - } - - results <- do.call(rbind, results) - - if(!is.null(outfile)){saveData(results, outfile, prompt)} - - results -} - -utils::globalVariables(c("prediction", "confidence")) \ No newline at end of file diff --git a/R/old/convertCoordinates.R b/R/old/convertCoordinates.R deleted file mode 100644 index 7d568bb..0000000 --- a/R/old/convertCoordinates.R +++ /dev/null @@ -1,58 +0,0 @@ -#' Convert bbox from Relative to Absolute Coordinates -#' -#' Each row is a MD bounding box, there can be multiple bounding boxes per image. -#' -#' @param results list of bounding boxes for each image -#' -#' @return A dataframe with one entry for each bounding box -#' @export -#' -#' @examples -#' \dontrun{ -#' images<-read_exif(imagedir,tags=c("filename","directory","DateTimeOriginal","FileModifyDate"), -#' recursive = TRUE) -#' colnames(images)[1]<-"FilePath" -#' mdsession<-loadMDModel(mdmodel) -#' mdres<-classifyImagesBatchMD(mdsession,images$FilePath, -#' resultsfile=resultsfile,checkpoint = 2500) -#' mdresflat<-convertresults(mdres) -#' } -convertCoordinates <- function(results){ - - images <- data.frame(image_path=character(),md_class=numeric(),md_confidence=numeric(), - pixelx=numeric(),pixely=numeric(), - x1=numeric(),x2=numeric(),y1=numeric(),y2=numeric(), - xmin=numeric(),xmax=numeric(),ymin=numeric(),ymax=numeric()) - - - for (i in 1:length(results)) { - #load image - jpg<-jpeg::readJPEG(results[[i]]$file) - jpgy<-dim(jpg)[1] - jpgx<-dim(jpg)[2] - - xmin<-max(0, round(results[[i]]$bbox1 * jpgx, 0)) - xmax<-min(jpgx,round((results[[i]]$bbox1 + results[[i]]$bbox3)*jpgx, 0)) - ymin<-max(0, round(results[[i]]$bbox2, 0)) - ymax<-min(jpgy, round((results[[i]]$bbox2 + results[[i]]$bbox4), 0)) - - xminb<-max(0, round(results[[i]]$bbox1*jpgx, 0)) - xmaxb<-min(jpgx, round((results[[i]]$bbox1+results[[i]]$bbox3)*jpgx, 0)) - yminb<-max(0, round(results[[i]]$bbox2*jpgy, 0)) - ymaxb<-min(jpgy, round((results[[i]]$bbox2+results[[i]]$bbox4)*jpgy, 0)) - - if (length(dim(jpg)) == 2) dim(jpg) <- c(dim(jpg)[1], dim(jpg)[2],1) - - line <- data.frame(image_path = results[[i]]$file, - md_class = results[[i]]$category, - md_confidence = results[[i]]$conf, - pixelx = jpgx, pixely = jpgy, - x1 = results[[i]]$bbox1, x2 = results[[i]]$bbox2, - y1 = results[[i]]$bbox3, y2 = results[[i]]$bbox4, - xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax) - images<-rbind(images,line) - } - images -} - - diff --git a/R/old/detect.R b/R/old/detect.R deleted file mode 100644 index 95ecdf3..0000000 --- a/R/old/detect.R +++ /dev/null @@ -1,306 +0,0 @@ -#' Run MD on a Single Image -#' -#' Returns the MD bounding boxes, classes, confidence above the min_conf -#' threshold for a single image. #' Requires a an mdsession is already -#' loaded (see loadMDModel() ) and the file path of the image in question. -#' -#' -#' @param mdsession Should be the output from loadMDmodel(model) -#' @param imagefile The path for the image in question -#' @param mdversion MegaDetector version, defaults to 5 -#' @param min_conf Confidence threshold for returning bounding boxes, defaults to 0.1 -#' -#' @return a list of MD bounding boxes, classes, and confidence for the image -#' @import tensorflow -#' @export -#' -#' @examples -#' \dontrun{ -#' images <- read_exif(imagedir, -#' tags = c("filename", "directory", "FileModifyDate"), -#' recursive = TRUE) -#' colnames(images)[1] <- "FilePath" -#' mdsession <- loadMDModel(mdmodel) -#' mdres <- classifyImageMD(mdsession, images$FilePath[1]) -#' } -detectObject <- function(mdsession, imagefile, mdversion=5 , min_conf = 0.1) { - if ("mdsession" %in% class(mdsession)) { type <- "mdsession" } - else if ("mdmodel" %in% class(mdsession)) { type <- "mdmodel" } - else { stop("Expecting a mdsession or mdmodel object.") } - - if (!file.exists(imagefile)) {stop("Image files does not exist.")} - - np <- reticulate::import("numpy") - - if (mdversion<=4) { - img <- loadImage(imagefile, FALSE) - # get tensors - image_tensor <- mdsession$graph$get_tensor_by_name("image_tensor:0") - box_tensor <- mdsession$graph$get_tensor_by_name("detection_boxes:0") - score_tensor <- mdsession$graph$get_tensor_by_name("detection_scores:0") - class_tensor <- mdsession$graph$get_tensor_by_name("detection_classes:0") - - res <- mdsession$run(list(box_tensor, score_tensor, class_tensor), - feed_dict = list("image_tensor:0" = np$expand_dims(img, axis = F))) - resfilter <- which(res[[2]] >= min_conf) - - list( - file = imagefile, - max_detection_conf = max(res[[2]]), - detections = data.frame( - category = res[[3]][resfilter], conf = res[[2]][resfilter], - bbox1 = res[[1]][1, resfilter, 2], bbox2 = res[[1]][1, resfilter, 1], - bbox3 = res[[1]][1, resfilter, 4] - res[[1]][1, resfilter, 2], - bbox4 = res[[1]][1, resfilter, 3] - res[[1]][1, resfilter, 1] - ) - ) - } - else{ #MDv5 - img <- loadImageResizeSize(imagefile,height=1280,width=1280,pad=TRUE,standardize=TRUE) - # get tensors - if(type=="mdsession"){ - image_tensor=mdsession$graph$get_tensor_by_name('x:0') - output_tensor = mdsession$graph$get_tensor_by_name('Identity:0') - } - else{ infer <- mdsession$signatures["serving_default"] } - - if(type=="mdsession"){ - res<-mdsession$run(list(output_tensor),feed_dict=list("x:0"=tf$reshape(img[[1]],as.integer(c(1,1280,1280,3)))$numpy())) - res<-tf$cast(res[[1]],tf$float32) - } - else{ - res<-infer(tf$reshape(img[[1]],as.integer(c(1,1280,1280,3)))) - res<-res[[1]] - } - - - scores<-(as.array(res[,,6:8],drop=F)*as.array(res)[,,c(5,5,5),drop=F]) - resfilter<-tensorflow::tf$image$combined_non_max_suppression(tf$reshape(res[,,1:4],as.integer(c(dim(res)[1],dim(res)[2],1,4))),scores,max_output_size_per_class=as.integer(100), - max_total_size=as.integer(100),score_threshold=min_conf,clip_boxes=TRUE) - #images[(i * batch - batch)+1] - lapply(1:length(resfilter$valid_detections),processYOLO5,resfilter$nmsed_boxes, - resfilter$nmsed_classes,resfilter$nmsed_scores,resfilter$valid_detections,img)[[1]] - - } -} - - - -#' Run MegaDetector on a batch of images -#' -#' Runs MD on a list of image filepaths. -#' Can resume for a results file and will checkpoint the results after a set -#' number of images -#' -#' @param mdsession should be the output from loadMDmodel(model) -#' @param images list of image filepaths -#' @param mdversion select MegaDetector version, defaults to 5 -#' @param min_conf Confidence threshold for returning bounding boxes, defaults to 0.1 -#' @param batch Process images in batches, defaults to 1 -#' @param outfile File containing previously checkpointed results -#' @param checkpoint Bank results after processing a number of images, defaults to 5000 -#' -#' @return a list of lists of bounding boxes for each image -#' @import tensorflow -#' @importFrom methods is -#' @export -#' -#' @examples -#' \dontrun{ -#' images <- read_exif(imagedir, -#' tags = c("filename", "directory", "DateTimeOriginal", "FileModifyDate"), -#' recursive = TRUE) -#' colnames(images)[1] <- "FilePath" -#' mdsession <- loadMDModel(mdmodel) -#' mdres <- classifyImagesBatchMD(mdsession, images$FilePath, -#' outfile = mdoutfile, checkpoint = 2500) -#' } -detectObjectBatch <- function(mdsession, images, mdversion = 5, min_conf = 0.1, batch = 1, outfile = NULL, checkpoint = 5000) { - if ("mdsession" %in% class(mdsession)) { type <- "mdsession" } - else if ("mdmodel" %in% class(mdsession)) { type <- "mdmodel" } - else { stop("Expecting a mdsession or mdmodel object.") } - - - if (!is.null(outfile)) { - if (!dir.exists(dirname(outfile))) { stop("Results file directory does not exist.\n") } - if (tolower(substr(outfile, nchar(outfile) - 5, nchar(outfile))) != ".rdata") { - outfile <- paste0(outfile, ".RData") - } - - # if results file exists prompt user to load it and resume - if (!is.null(outfile) & file.exists(outfile)) { - if (tolower(readline(prompt = "Results file exists, would you like to resume? y/n: ")) == "y") { - load(outfile) - cat(length(results), "records loaded.\n") - if (length(results) == length(images)){ return(results) } - images <- images[!(images %in% sapply(results, function(x) x$file))] - } - else { results <- list() } - } - else { results <- list() } - } - else { results <- list() } - - if(mdversion <= 4){ - # create data generator - dataset <- ImageGenerator(images, standardize = FALSE, batch = batch) - - # get tensors - image_tensor <- mdsession$graph$get_tensor_by_name("image_tensor:0") - box_tensor <- mdsession$graph$get_tensor_by_name("detection_boxes:0") - score_tensor <- mdsession$graph$get_tensor_by_name("detection_scores:0") - class_tensor <- mdsession$graph$get_tensor_by_name("detection_classes:0") - - steps <- ceiling(length(images) / batch) - opb <- pbapply::pboptions(char = "=") - pb <- pbapply::startpb(1, steps) - starttime<-Sys.time() - # process all images - for (i in 1:steps) { - # catch errors due to empty or corrupted images - if (!inherits(try(img <- dataset$get_next(), silent = T), "try-error")) { - res <- mdsession$run(list(box_tensor, score_tensor, class_tensor), feed_dict = list("image_tensor:0" = img$numpy())) - for (l in 1:dim(res[[1]])[1]) { - resfilter <- which(res[[2]] >= min_conf) - results[[length(results) + 1]] <- list( - file = images[(i * batch - batch) + l], - max_detection_conf = max(res[[2]][l, ]), - detections = data.frame( - category = res[[3]][l, resfilter], conf = res[[2]][l, resfilter], - bbox1 = res[[1]][l, resfilter, 2], - bbox2 = res[[1]][l, resfilter, 1], - bbox3 = res[[1]][l, resfilter, 4] - res[[1]][l, resfilter, 2], - bbox4 = res[[1]][l, resfilter, 3] - res[[1]][l, resfilter, 1] - ) - ) - } - } - # save intermediate results at given checkpoint interval - if (!is.null(outfile) & (i %% checkpoint / batch) == 0) { - save(results, file = outfile) - } - pbapply::setpb(pb, i) - } - pbapply::setpb(pb, steps) - pbapply::closepb(pb) - } - else { #MDv5 - dataset <- ImageGeneratorSize(images, resize_height=1280, resize_width=1280, - pad = TRUE, standardize = TRUE, batch = batch) - - if (type=="mdsession") { - # get tensors - image_tensor = mdsession$graph$get_tensor_by_name('x:0') - output_tensor = mdsession$graph$get_tensor_by_name('Identity:0') - } - else{ - infer<-mdsession$signatures["serving_default"] - } - - steps <- ceiling(length(images) / batch) - opb <- pbapply::pboptions(char = "=") - pb <- pbapply::startpb(1, steps) - starttime<-Sys.time() - # process all images - for (i in 1:steps) { - # catch errors due to empty or corrupted images - if (!inherits(try(img <- dataset$get_next(), silent = T), "try-error")) { - if(type=="mdsession"){ - resbatch<-mdsession$run(list(output_tensor),feed_dict=list("x:0"=img[[1]]$numpy())) - resbatch<-tf$cast(resbatch[[1]],tf$float32) - } - else{ - resbatch<-infer(img[[1]]) - resbatch<-resbatch[[1]] - } - - scores<-(as.array(resbatch[,,6:8],drop=F)*as.array(resbatch)[,,c(5,5,5),drop=F]) - resfilter<-tensorflow::tf$image$combined_non_max_suppression(tf$reshape(resbatch[,,1:4],as.integer(c(dim(resbatch)[1],dim(resbatch)[2],1,4))), - scores,max_output_size_per_class=as.integer(100), - max_total_size=as.integer(100),score_threshold=min_conf,clip_boxes=TRUE) - #images[(i * batch - batch)+1] - results<-c(results,lapply(1:length(resfilter$valid_detections),processYOLO5,resfilter$nmsed_boxes, - resfilter$nmsed_classes,resfilter$nmsed_scores,resfilter$valid_detections,img)) - - } - # save intermediate results at given checkpoint interval - if (!is.null(outfile) & (i %% checkpoint / batch) == 0) { - save(results, file = outfile) - } - pbapply::setpb(pb, i) - } - pbapply::setpb(pb, steps) - pbapply::closepb(pb) - } - if (!is.null(outfile)) { save(results, file = outfile) } - - cat(paste(length(images),"images processed.",round(length(images)/(as.numeric(Sys.time())-as.numeric(starttime)),1),"images/s\n")) - results -} - -#' Process YOLO5 output and convert to MD format -#' -#' Returns a list with the standard MD output format. Used for batch processing -#' -#' -#' @param n index for the record in the batch -#' @param boxes array of boxes returned by combined_non_max_suppression -#' @param classes vector of classes returned by combined_non_max_suppression -#' @param scores vector of probabilities returned by combined_non_max_suppression -#' @param selection vector of number of detected boxes returned by combined_non_max_suppression -#' @param batch batch used to detect objects -#' -#' @return a list of MD bounding boxes, classes, and confidence for the image -#' @export -#' -processYOLO5 <- function(n, boxes, classes, scores, selection, batch){ - boxes<-as.array(boxes) - classes<-as.array(classes) - scores<-as.array(scores) - - img_width<-batch[[2]]$numpy()[n] - img_height<-batch[[3]]$numpy()[n] - - a1<-as.vector((img_height-img_width)/img_height) - a2<-as.vector((img_width-img_height)/img_width) - - if (as.vector(selection)[n]>0) { - filter<-c(1:(as.vector(selection)[n])) - - if (img_width>img_height) { - list(file = as.character(batch[[4]][n]), - max_detection_conf = max(scores[n,filter],0), - detections = data.frame( - category = classes[n,filter]+1, conf = scores[n,filter], - bbox1 = pmin(pmax(boxes[n,filter,1]-boxes[n,filter,3]/2,0),1), - bbox2 = pmin(pmax(((boxes[n,filter,2]-boxes[n,filter,4]/2)-a2/2)/(1-a2),0),1), - bbox3 = pmin(pmax(boxes[n,filter,3],0.01),1), - bbox4 = pmin(pmax(boxes[n,filter,4]/(1-a2),0.01),1) - ) - ) - } - else{ - list(file = as.character(batch[[4]][n]), - max_detection_conf = max(scores[n,filter],0), - detections = data.frame( - category = classes[n,filter]+1, conf = scores[n,filter], - bbox1 = pmin(pmax(((boxes[n,filter,1]-boxes[n,filter,3]/2)-a1/2)/(1-a1),0),1), - bbox2 = pmin(pmax((boxes[n,filter,2]-boxes[n,filter,4]/2),0),1), - bbox3 = pmin(pmax(boxes[n,filter,3]/(1-a1),0.01),1), - bbox4 = pmin(pmax(boxes[n,filter,4],0.01),1) - ) - ) - } - } - else{ - list(file = as.character(batch[[4]][n]), - max_detection_conf = max(scores[n,],0), - detections = data.frame( - category = integer(), conf = numeric(), - bbox1 = numeric(), - bbox2 = numeric(), - bbox3 = numeric(), - bbox4 = numeric()) - ) - } -} diff --git a/R/old/extractBoxes.R b/R/old/extractBoxes.R deleted file mode 100644 index 067642d..0000000 --- a/R/old/extractBoxes.R +++ /dev/null @@ -1,318 +0,0 @@ -# This functions are defunct/extraneous and need maintenance to bring in line -# with the rest of the package. - - -#' Extract bounding boxes for a single image and save as new images -#' -#' Requires the unflattened raw MD output -#' -#' @param image single image, raw MD output format (list) -#' @param min_conf Confidence threshold (defaults to 0, not in use) -#' @param buffer Adds a buffer to the MD bounding box, defaults to 2px -#' @param return.crops Toggle to return list of cropped images, defaults to FALSE -#' @param save Toggle to save output cropped, defaults to FALSE -#' @param resize Size in pixels to resize cropped images, NA if images are not resized, defaults to NA -#' @param outdir Directory in which output cropped images will be saved -#' @param quality Compression level of output cropped image, defaults to 0.8 -#' -#' @return a flattened data.frame containing crop information -#' @export -#' @details A variable crop_rel_path in the image list can be used to change the path where the crops will be stored. -#' @details The final output path will be the outdir plus the crop_rel_path. -#' -#' @examples -#' \dontrun{ -#' images <- read_exif(imagedir, tags = c("filename","directory"), recursive = TRUE) -#' crops <- extractBoxesFromMD(images[1, ], return.crops = TRUE, save = TRUE) -#' } -extractBoxesFromMD <- function(image, min_conf = 0, buffer = 0, return.crops = FALSE, save = FALSE, resize = NA, outdir = "", quality = 0.8) { - if (save & !dir.exists(outdir)) { - stop("Output directory invalid.\n") - } - images_flat <- data.frame( - FilePath = character(), md_class = numeric(), md_confidence = numeric(), pixelx = numeric(), pixely = numeric(), - x1 = numeric(), x2 = numeric(), y1 = numeric(), y2 = numeric(), - xmin = numeric(), xmax = numeric(), ymin = numeric(), ymax = numeric(), crop_path = character(), stringsAsFactors = FALSE - ) - - # load image - jpg <- jpeg::readJPEG(image$FilePath) - jpgy <- dim(jpg)[1] - jpgx <- dim(jpg)[2] - - # get bounding boxes - # get bounding boxes - if (is.data.frame(image$detections) && nrow(image$detections)>0) { - s <- image$detections - - # extract bounding box - if (return.crops) { - crops <- list() - } - - c <- 1 - for (j in 1:nrow(s)) { - xmin <- max(0, round(s[j, ]$bbox1 * jpgx, 0)) - xmax <- min(jpgx, round(s[j, ]$bbox1 * jpgx + max(1, s[j, ]$bbox3 * jpgx), 0)) - ymin <- max(0, round(s[j, ]$bbox2 * jpgy, 0)) - ymax <- min(jpgy, round(s[j, ]$bbox2 * jpgy + max(1, s[j, ]$bbox4 * jpgy), 0)) - buffer2 <- max(xmax - xmin, ymax - ymin) * buffer - - xminb <- max(0, xmin - buffer2) - xmaxb <- min(jpgx, xmax + buffer2) - yminb <- max(0, ymin - buffer2) - ymaxb <- min(jpgy, ymax + buffer2) - - if (length(dim(jpg)) == 2) { - dim(jpg) <- c(dim(jpg)[1], dim(jpg)[2], 1) - } - - crop <- jpg[yminb:ymaxb, xminb:xmaxb, ] - - # resize and pad if requested - if (!is.na(resize)) { - crop <- resizePad(crop, resize) - } - - # if we return crops save crop in list - if (return.crops) { - crops[[c]] <- crop - } - - # save image if requested - imgname <- "" - if (save) { - if(!is.null(image$crop_rel_path)){ - imgname <- paste0(outdir, image$crop_rel_path,basename(image$FilePath)) - }else{ - imgname <- paste0(outdir, basename(image$FilePath)) - } - imgbase <- strsplit(basename(imgname), "[.]")[[1]][1] - imgext <- strsplit(basename(imgname), "[.]")[[1]][2] - if (nrow(s) > 1) { - imgname <- paste0(dirname(imgname), "/", imgbase, "_c", j, ".", imgext) - } - i=j - while(file.exists(imgname)){ - imgname <- paste0(dirname(imgname), "/", imgbase, "_c", i, ".", imgext) - i=i+1 - } - if (!dir.exists(dirname(imgname))) { - dir.create(dirname(imgname), recursive = TRUE) - } - - jpeg::writeJPEG(crop, imgname, quality = quality) - } - line <- data.frame( - FilePath = image$FilePath, md_class = as.numeric(s[j, ]$category), md_confidence = s[j, ]$conf, pixelx = jpgx, pixely = jpgy, - x1 = s[j, ]$bbox1, x2 = s[j, ]$bbox2, y1 = s[j, ]$bbox3, y2 = s[j, ]$bbox4, - xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, crop_path = imgname, stringsAsFactors = FALSE - ) - images_flat <- rbind(images_flat, line) - } - } else { - line <- data.frame( - FilePath = image$FilePath, md_class = 0, md_confidence = image$max_detection_conf, pixelx = jpgx, pixely = jpgy, - x1 = NA, x2 = NA, y1 = NA, y2 = NA, - xmin = NA, xmax = NA, ymin = NA, ymax = NA, crop_path = "", stringsAsFactors = FALSE - ) - images_flat <- rbind(images_flat, line) - } - if (return.crops) { - list(crops = crops, data = images_flat) - } else { - images_flat - } -} - - -#' Extract crops from a single image represented by a processed dataframe -#' -#' @param image dataframe containing MD output (assumes single row) -#' @param min_conf Confidence threshold (defaults to 0, not in use) -#' @param buffer Adds a buffer to the MD bounding box, defaults to 2px -#' @param save Toggle to save output cropped, defaults to FALSE -#' @param resize Size in pixels to resize cropped images, NA if images are not resized, defaults to NA -#' @param outdir Directory in which output cropped images will be saved -#' @param quality Compression level of output cropped image, defaults to 0.8 -#' -#' @return A dataframe containing image and crop paths -#' @details A variable crop_rel_path in the image list can be used to change the path where the crops will be stored. -#' @details The final output path will be the outdir plus the crop_rel_path. -#' @export -#' -#' @examples -#' \dontrun{ -#' crops <- extractBoxesFromFlat(mdresflat[1, ], save = TRUE, out) -#' } -extractBoxesFromFlat <- function(image, min_conf = 0, buffer = 0, save = TRUE, resize = NA, outdir = "", quality = 0.8) { - if (save & !dir.exists(outdir)) { - stop("Output directory invalid.\n") - } - - if(sum(is.na(image[,c("bbox1","bbox2","bbox3","bbox4")]))==0 & image$max_conf>=min_conf){ - - # load image - jpg <- jpeg::readJPEG(image$Frame) - jpgy <- dim(jpg)[1] - jpgx <- dim(jpg)[2] - - # get bounding boxes - # get bounding boxes - xmin <- max(0, round(image$bbox1 * jpgx, 0)) - xmax <- min(jpgx, round(image$bbox1 * jpgx + max(1, image$bbox3 * jpgx), 0)) - ymin <- max(0, round(image$bbox2 * jpgy, 0)) - ymax <- min(jpgy, round(image$bbox2 * jpgy + max(1, image$bbox4 * jpgy), 0)) - buffer2 <- max(xmax - xmin, ymax - ymin) * buffer - - xminb <- max(0, xmin - buffer2) - xmaxb <- min(jpgx, xmax + buffer2) - yminb <- max(0, ymin - buffer2) - ymaxb <- min(jpgy, ymax + buffer2) - if (length(dim(jpg)) == 2) { - dim(jpg) <- c(dim(jpg)[1], dim(jpg)[2], 1) - } - - crop <- jpg[yminb:ymaxb, xminb:xmaxb, ] - - # resize and pad if requested - if (!is.na(resize)) { - crop <- resizePad(crop, resize) - } - - # save image if requested - if (save) { - if(!is.null(image$crop_rel_path)){ - imgname <- paste0(outdir, image$crop_rel_path,basename(image$Frame)) - }else{ - imgname <- paste0(outdir, basename(image$Frame)) - } - imgbase <- strsplit(basename(imgname), "[.]")[[1]][1] - imgext <- strsplit(basename(imgname), "[.]")[[1]][2] - j=1 - while(file.exists(imgname)){ - imgname <- paste0(dirname(imgname), "/", imgbase, "_c", j, ".", imgext) - j=j+1 - } - if (!dir.exists(dirname(imgname))) { - dir.create(dirname(imgname), recursive = T) - } - - jpeg::writeJPEG(crop, imgname, quality = quality) - } - data.frame(image,crop_path=imgname) - } -} - - -#' Extract bounding boxes and save as new image from a batch of images -#' -#' @param images list of MD output or flat data.frame -#' @param min_conf Confidence threshold (defaults to 0, not in use) -#' @param buffer Adds a buffer to the MD bounding box, defaults to 2px -#' @param save Toggle to save output cropped, defaults to FALSE -#' @param resize Size in pixels to resize cropped images, NA if images are not resized, defaults to NA -#' @param outdir Directory in which output cropped images will be saved -#' @param quality Compression level of output cropped image, defaults to 0.8 -#' @param parallel Toggle to enable parallel processing, defaults to FALSE -#' @param nproc Number of workers if parallel = TRUE, defaults to output of detectCores() -#' -#' @return a flattened dataframe containing crop information -#' @details A variable crop_rel_path in the image list or data.frame can be used to change the path where the crops will be stored. -#' @details The final output path will be the outdir plus the crop_rel_path. -#' @export -#' -#' @examples -#' \dontrun{ -#' images <- read_exif(imagedir, tags = c("filename", "directory"), recursive = TRUE) -#' crops <- extractAllBoxes(images,save=TRUE,out) -#' } -extractBoxes <- function(images, min_conf = 0, buffer = 0, save = FALSE, resize = NA, outdir = "", quality = 0.8, parallel = FALSE, nproc = parallel::detectCores()) { - if (outdir != "" & !dir.exists(outdir)) { - if (!dir.create(outdir, recursive = TRUE)) { - stop("Output directory invalid.\n") - } - } - if(!(inherits(images,"list") | inherits(images,"data.frame"))){ - stop("images needs to be a list of MD results or a data.frame.\n") - } - if(inherits(images,"list")){ - # define processing function - run.parallel <- function(i) { - if (file.exists(images[[i]]$FilePath)) { - extractBoxesFromMD(images[[i]], min_conf = min_conf, buffer = buffer, resize = resize, save = save, outdir = outdir, quality = quality) - } - else { NA } - } - opb <- pbapply::pboptions(char = "=") - if (parallel) { - type <- "PSOCK" - - cl <- parallel::makeCluster(min(parallel::detectCores(), nproc), type = type) - parallel::clusterExport(cl, list("buffer", "resize", "quality", "outdir", "images", "extractBoxesFromMD", "resizePad"), envir = environment()) - # set random number generator for cluster - parallel::clusterSetRNGStream(cl) - - results <- pbapply::pblapply(1:length(images), function(x) { - run.parallel(x) - }, cl = cl) - parallel::stopCluster(cl) - } else { - results <- pbapply::pblapply(1:length(images), function(x) { - run.parallel(x) - }) - } - results <- do.call(rbind, results) - results - }else if(inherits(images,"data.frame")){ - # define processing function - run.parallel <- function(i) { - if (file.exists(images[i, ]$Frame)) { - extractBoxesFromFlat(images[i, ], min_conf = min_conf, buffer = buffer, resize = resize, save = save, outdir = outdir, quality = quality) - } else { - NA - } - } - - opb <- pbapply::pboptions(char = "=") - if (parallel) { - type <- "PSOCK" - cl <- parallel::makeCluster(min(parallel::detectCores(), nproc), type = type) - parallel::clusterExport(cl, list("buffer", "resize", "quality", "outdir", "images", "extractBoxesFromFlat", "resizePad"), envir = environment()) - # set random number generator for cluster - parallel::clusterSetRNGStream(cl) - - results <- pbapply::pblapply(1:nrow(images), function(x) { run.parallel(x) }, cl = cl) - parallel::stopCluster(cl) - } else { - results <- pbapply::pblapply(1:nrow(images), function(x) { run.parallel(x) }) - } - results <- do.call(rbind, results) - results - } -} - -#' Resize an image with padding -#' -#' @param img the image, read by jpeg library -#' @param size new size -#' -#' @return returns resized jpeg image -#' @export -#' -#' @examples -#' \dontrun{ -#' crop <- resizePad(cropped_image_path,256) -#' } -resizePad <- function(img, size = 256) { - if (dim(img)[0] == 0 || dim(img)[1] == 0) { - return(img) - } - imgpad <- array(0, c(max(dim(img)), max(dim(img)), 3)) - xstart <- max(1, floor((dim(imgpad)[2] - dim(img)[2]) / 2)) - ystart <- max(1, floor((dim(imgpad)[1] - dim(img)[1]) / 2)) - imgpad[ystart:(ystart + dim(img)[1] - 1), xstart:(xstart + dim(img)[2] - 1), ] <- img - imgres <- imager::resize(imager::as.cimg(imgpad), size_x = size, size_y = size, interpolation_type = 3) - imgres[, , 1, ] -} - diff --git a/R/old/generator.R b/R/old/generator.R deleted file mode 100644 index 73b268e..0000000 --- a/R/old/generator.R +++ /dev/null @@ -1,397 +0,0 @@ -#' @title Tensorflow data generator that crops images to bounding box. -#' -#' @description Creates an image data generator that crops images based on bounding box coordinates. -#' -#' @param files a vector of file names -#' @param boxes a data frame or matrix of bounding box coordinates in the format left, top, width, height. -#' @param resize_height the height the cropped image will be resized to. -#' @param resize_width the width the cropped image will be resized to. -#' @param standardize standardize the image to the range 0 to 1, TRUE or FALSE. -#' @param batch the batch size for the image generator. -#' -#' @return A Tensorflow image data generator. -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{#' dataset <- cropImageGenerator(images, boxes, standardize = FALSE, batch = batch)} -cropImageGenerator <- function(files, boxes, resize_height = 456, resize_width = 456, standardize = FALSE, batch = 32) { - # create data generator for training (image/label pair) - if (!(is.vector(files) && inherits(files,"character"))) { - stop("files needs to be a vector of file names.\n") - } - if (ncol(boxes) != 4) { - stop("boxes must have four columns.\n") - } - if (sum(apply(boxes, 2, is.numeric)) != 4) { - stop("boxes must be numeric.\n") - } - if (length(files) != nrow(boxes)) { - stop("boxes must have the same number of rows as the length of files.\n") - } - - data <- data.frame(file = files, boxes) - dataset <- tfdatasets::tensor_slices_dataset(data) - dataset <- tfdatasets::dataset_map(dataset, function(x) loadImageResizeCrop(x, resize_height, resize_width, standardize),num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - dataset <- tfdatasets::dataset_prefetch(dataset, buffer_size = tf$data$experimental$AUTOTUNE) - # dataset<-dataset$apply(tf$data$experimental$ignore_errors()) - dataset <- reticulate::as_iterator(dataset) - dataset -} - - -#' @title Tensorflow data generator for training that crops images to bounding box. -#' -#' @description Creates an image data generator that crops images based on bounding box coordinates and returnes an image/label pair. -#' -#' @param files a vector of file names -#' @param boxes a data frame or matrix of bounding box coordinates in the format left, top, width, height. -#' @param label a vector of labels -#' @param classes a vector of all classes for the active model -#' @param resize_height the height the cropped image will be resized to. -#' @param resize_width the width the cropped image will be resized to. -#' @param standardize standardize the image to the range 0 to 1, TRUE or FALSE. -#' @param augmentation_color use data augmentation to change the color, TRUE or FALSE. -#' @param augmentation_geometry use data augmentation to change the geometry of the images, TRUE or FALSE. -#' @param shuffle return data pairas in random order, TRUE or FALSE. -#' @param cache use caching to reduce reading from disk, TRUE or FALSE. -#' @param cache_dir directory used for caching, if none provided chaching will be done in memory. -#' @param return_iterator Should an iterator be returned? If RALSE a tfdataset will be returned. -#' @param batch the batch size for the image generator. -#' -#' @return A Tensorflow image data generator. -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{ -#' dataset <- cropImageTrainGenerator(images, standardize = FALSE, batch = batch)} -cropImageTrainGenerator <- function(files, boxes, label, classes, - resize_height = 456, resize_width = 456, - standardize = FALSE, augmentation_color=FALSE, - augmentation_geometry=FALSE, shuffle=FALSE, - cache=FALSE, cache_dir=NULL, return_iterator=FALSE, batch = 32) { - # create data generator for training (image/label pair) - if (!(is.vector(files) && inherits(files,"character"))) { - stop("files needs to be a vector of file names.\n") - } - if (ncol(boxes) != 4) { - stop("boxes must have four columns.\n") - } - if (sum(apply(boxes, 2, is.numeric)) != 4) { - stop("boxes must be numeric.\n") - } - if (length(files) != nrow(boxes)) { - stop("boxes must have the same number of rows as the length of files.\n") - } - if(!is.null(cache_dir) && !dir.exists(cache_dir)){ - stop("cache directory does not exist.\n") - } - data <- data.frame(file = files, boxes,label) - rng<-tf$random$Generator$from_seed(as.integer(123),alg='philox') - auggeo<-imageAugmentationGeometry() - dataset <- tfdatasets::tensor_slices_dataset(data) - if(shuffle) - dataset <- tfdatasets::dataset_shuffle(dataset,buffer_size=nrow(data), reshuffle_each_iteration=TRUE) - dataset <- tfdatasets::dataset_map(dataset, function(x) imageLabelCrop(x, classes,resize_height, resize_width, standardize),num_parallel_calls = tf$data$experimental$AUTOTUNE) - if(cache){ - if(is.null(cache_dir)){ - dataset <- tfdatasets::dataset_cache(dataset) - }else{ - dataset <- tfdatasets::dataset_cache(dataset,cache_dir) - } - } - dataset <- tfdatasets::dataset_repeat(dataset) - if(augmentation_geometry) - dataset <- tfdatasets::dataset_map(dataset,function(x,y)list(auggeo(x,training=TRUE),y),num_parallel_calls = tf$data$experimental$AUTOTUNE) - if(augmentation_color) - dataset <- tfdatasets::dataset_map(dataset,function(x,y)imageAugmentationColor(x,y,rng),num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - # dataset<-dataset$apply(tf$data$experimental$ignore_errors()) - dataset <- tfdatasets::dataset_prefetch(dataset, buffer_size = tf$data$experimental$AUTOTUNE) - if(return_iterator) - dataset <- reticulate::as_iterator(dataset) - dataset -} - - - - -#' @title Tensorflow data generator that resizes images. -#' -#' @description Creates an image data generator that resizes images if requested. -#' -#' @param files a vector of file names -#' @param resize_height the height the cropped image will be resized to. If NULL returns original size images. -#' @param resize_width the width the cropped image will be resized to. If NULL returns original size images.. -#' @param standardize standardize the image to the range 0 to 1, TRUE or FALSE. -#' @param batch the batch size for the image generator. -#' -#' @return A Tensorflow image data generator. -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{ -#' dataset <- ImageGenerator(images, standardize = FALSE, batch = batch) -#' } -ImageGenerator <- function(files, resize_height = NULL, resize_width = NULL, standardize = FALSE, batch = 1) { - # create data generator for training (image/label pair) - if (!(is.vector(files) && inherits(files,"character"))) { - stop("files needs to be a vector of file names.\n") - } - - data <- data.frame(file=files) - dataset <- tfdatasets::tensor_slices_dataset(files) - - if (is.null(resize_height) || is.null(resize_width)) { - message("No values were provided for resize, returning full-size images.") - dataset <- tfdatasets::dataset_map(dataset, function(x) loadImage(x, standardize=standardize), num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - } - else { - dataset <- tfdatasets::dataset_map(dataset, function(x) loadImageResize(x, resize_height, resize_width, standardize=standardize),num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - } - dataset <- tfdatasets::dataset_prefetch(dataset, buffer_size = tf$data$experimental$AUTOTUNE) - dataset <- reticulate::as_iterator(dataset) - dataset -} - - -#' @title Tensorflow data generator that resizes images and returns original image size. -#' -#' @description Creates an image data generator that resizes images if requested and also returns the original images size needed for MegaDetector. -#' -#' @param files a vector of file names -#' @param resize_height the height the cropped image will be resized to. If NULL returns original size images. -#' @param resize_width the width the cropped image will be resized to. If NULL returns original size images.. -#' @param pad pad the image instead of stretching it, TRUE or FALSE. -#' @param standardize standardize the image to the range 0 to 1, TRUE or FALSE. -#' @param batch the batch size for the image generator. -#' -#' @return A Tensorflow image data generator. -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{ -#' dataset <- ImageGenerator(images, standardize = FALSE, batch = batch) -#' } -ImageGeneratorSize <- function(files, resize_height = NULL, resize_width = NULL, pad=FALSE, standardize = FALSE, batch = 1) { - # create data generator for training (image/label pair) - if (!(is.vector(files) && inherits(files,"character"))) { - stop("Please provide a vector of file names.\n") - } - dataset <- tfdatasets::tensor_slices_dataset(files) - if (is.null(resize_height) || is.null(resize_width)) { - message("No values were provided for resize, returning full-size images.") - dataset <- tfdatasets::dataset_map(dataset, function(x) loadImage(x, standardize),num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset<-dataset$apply(tf$data$experimental$ignore_errors()) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - } else { - dataset <- tfdatasets::dataset_map(dataset, function(x) loadImageResizeSize(x, height=resize_height, width=resize_width, pad=pad,standardize=standardize),num_parallel_calls = tf$data$experimental$AUTOTUNE) - dataset<-dataset$apply(tf$data$experimental$ignore_errors()) - dataset <- tfdatasets::dataset_batch(dataset, batch, num_parallel_calls = tf$data$experimental$AUTOTUNE,deterministic=TRUE) - } - dataset <- tfdatasets::dataset_prefetch(dataset, buffer_size = tf$data$experimental$AUTOTUNE) - dataset <- reticulate::as_iterator(dataset) - dataset -} - - -#' @title Load an image and return the full size image as an image tensor. -#' -#' @description Load an image and return the full size an image tensor. Internal function to be called by image generator function. -#' -#' @param file path to a JPEG file -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return An image tensor. -#' @import tensorflow -loadImage <- function(file, standardize = FALSE) { - # catch error caused by missing files and zero-length files - if (is.null(tryCatch({image <- tf$io$read_file(file); - image <- tf$image$decode_jpeg(image, channels = 3, try_recover_truncated = T)},silent=T, error = function(e) NULL))) { - image <- tf$zeros(as.integer(c(299, 299, 3)), dtype = tf$uint8) - } - if (!standardize) image <- tf$image$convert_image_dtype(image, dtype = tf$uint8) - if (standardize) image <- tf$image$convert_image_dtype(image, dtype = tf$float32) - image -} - -#' @title Load and resize an image and return an image tensor. -#' -#' @description Load and resize an image and return an image tensor. Internal function to be called by image generator function. -#' -#' @param file path to a JPEG file -#' @param height the height the cropped image will be resized to. -#' @param width the width the cropped image will be resized to. -#' @param pad logical indicating whether the images should be padded or streched. -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return An image tensor. -#' @import tensorflow -loadImageResize <- function(file, height = 299, width = 299, pad=FALSE,standardize = FALSE) { - size <- as.integer(c(height, width)) - - # catch error caused by missing files and zero-length files - if (!is.null(tryCatch({image <- tf$io$read_file(file); - image <- tf$image$decode_jpeg(image, channels = 3, try_recover_truncated = T)},silent=T, error = function(e) NULL))) { - image <- tf$image$convert_image_dtype(image, dtype = tf$float32) - if(pad==TRUE){ - image<-tf$image$resize_with_pad(image, as.integer(height), as.integer(width), method = "bilinear") - }else{ - image<-tf$image$resize(image,size = size) - } - } else { - image <- tf$zeros(as.integer(c(height, width, 3)), dtype = tf$float32) - } - if (!standardize) image <- tf$image$convert_image_dtype(image, dtype = tf$uint8) - image -} - - -#' @title Load and resize an image and return an image tensor as well as a tensor with the original image size. -#' -#' @description Load and resize an image and return an image tensor as well as a tensor with the original image size. Internal function to be called by image generator function. -#' -#' @param file path to a JPEG file -#' @param height the height the cropped image will be resized to. -#' @param width the width the cropped image will be resized to. -#' @param pad pad the image instead of stretching it, TRUE or FALSE. -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return An image tensor. -#' @import tensorflow -loadImageResizeSize <- function(file, height = 299, width = 299, pad=FALSE,standardize = FALSE) { - # catch error caused by missing files and zero-length files - if (!is.null(tryCatch({image <- tf$io$read_file(file); - image <- tf$image$decode_jpeg(image, channels = 3, try_recover_truncated = T)},silent=T, error = function(e) NULL))) { - size <- as.integer(c(height, width)) - imgdim <- tf$cast(tf$unstack(tf$shape(image)), tf$float32) - img_height<- tf$cast(imgdim[[0]], tf$int32) - img_width <- tf$cast(imgdim[[1]], tf$int32) - image <- tf$image$convert_image_dtype(image, dtype = tf$float32) - if(pad==TRUE){ - image<-tf$image$resize_with_pad(image, as.integer(height), as.integer(width), method = "bilinear") - }else{ - image<-tf$image$resize(image,size = size) - } - if (!standardize) image <- tf$image$convert_image_dtype(image, dtype = tf$uint8) - image<-list(image=image,width=img_width,height=img_height,file=file) - } else { - image<-list(image = tf$zeros(as.integer(c(height, width, 3)), dtype = tf$float32),width=tf$cast(width, tf$int32),height=tf$cast(height, tf$int32),file=file) - } - image -} - - -#' @title Load, resize and crop an image and return an image tensor. -#' -#' @description Load a JPEG image and crop it to a bounding box. Internal function to be called by image generator function. -#' -#' @param data a list with the first element being a path to an image file and the next four arguments being the bounding box coordinates. -#' @param height the height the cropped image will be resized to. -#' @param width the width the cropped image will be resized to. -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return A Tensorflow image data generator. -#' @import tensorflow -loadImageResizeCrop <- function(data, height = 299, width = 299, standardize = FALSE) { - # catch error caused by missing files and zero-length files - if (!is.null(tryCatch({image <- tf$io$read_file(data[[1]]); - image <- tf$image$decode_jpeg(image, channels = 3, try_recover_truncated = T)},silent=T, error = function(e) NULL))) { - imgdim <- tf$cast(tf$unstack(tf$shape(image)), tf$float32) - img_height<- tf$cast(imgdim[[1]], tf$int32) - img_width <- tf$cast(imgdim[[0]], tf$int32) - crop_top <- tf$cast(imgdim[[1]] * data[[2]], tf$int32) - crop_left <- tf$cast(imgdim[[0]] * data[[3]], tf$int32) - crop_height <- tf$cast(imgdim[[1]] * data[[4]], tf$int32) - crop_width <- tf$cast(imgdim[[0]] * data[[5]], tf$int32) - crop_height <- tf$cond(tf$greater((crop_top+crop_height),img_height),function()tf$cast(img_height-crop_top, tf$int32),function()crop_height) - crop_width <- tf$cond(tf$greater((crop_left+crop_width),img_width),function()crop_height<-tf$cast(img_width-crop_left, tf$int32),function()crop_width) - crop_height <- tf$cond(tf$equal(crop_height,as.integer(0)),function()tf$cast(1, tf$int32),function()crop_height) - crop_width <- tf$cond(tf$equal(crop_width,as.integer(0)),function()crop_height<-tf$cast(1, tf$int32),function()crop_width) - image <- tf$image$convert_image_dtype(image, dtype = tf$float32) - image <- tf$image$crop_to_bounding_box(image, crop_left, crop_top, crop_width, crop_height) - image <- tf$image$resize_with_pad(image, as.integer(height), as.integer(width), method = "bilinear") - } else { - image <- tf$zeros(as.integer(c(height, width, 3)), dtype = tf$float32) - } - if (!standardize) image <- tf$image$convert_image_dtype(image, dtype = tf$uint8) - image -} - - -#' @title Load image and return a tensor with an image and a corresponding label. -#' -#' @description Load image and return a tensor with an image and a corresponding label. Internal function to be called by image generator function. -#' -#' @param data a list with the first element being an image file path and the second element a label. -#' @param classes list of classes -#' @param height the height the cropped image will be resized to. -#' @param width the width the cropped image will be resized to. -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return An image and label tensor. -#' @import tensorflow -imageLabel <- function(data, classes, height = 299, width = 299, standardize = FALSE) { - image <- loadImageResize(data[[1]], height, width, standardize) - list(image, tf$cast(classes==data[[6]],tf$int16)) -} - - - -#' @title Load image, crop and return a tensor with an image and a corresponding label. -#' -#' @description Load image, crop and return a tensor with an image and a corresponding label. Internal function to be called by image generator function. -#' -#' @param data a list with the first element being an image file path, the next four elements being the bounding box coordinates and the last element a label -#' @param classes list of classes -#' @param height the height the cropped image will be resized to. -#' @param width the width the cropped image will be resized to. -#' @param standardize standardize the image, TRUE or FALSE. -#' -#' @return An image and label tensor. -#' @import tensorflow -imageLabelCrop <- function(data, classes, height = 299, width = 299, standardize = FALSE) { - image <- loadImageResizeCrop(list(data[[1]],data[[2]],data[[3]],data[[4]], data[[5]]), height, width, standardize) - list(image, tf$cast(classes==data[[6]],tf$int16)) -} - - -#' @title Perform image augmentation through random color adjustments on an image/label pair. -#' -#' @description Performs image augmentation on a image/label pair for training. Uses random brightness,contrast,saturation, and hue. -#' -#' @param image an image tensor. -#' @param label a label tensor. -#' @param rng a random number generator use to generate a random seed. -#' -#' @return An image and label tensor. -#' @import tensorflow -imageAugmentationColor<-function(image,label,rng){ - seed=rng$make_seeds(as.integer(2)) - seed=seed[1,] - image<-tf$image$stateless_random_brightness(image,0.2,seed) - image<-tf$image$stateless_random_contrast(image,0.3,0.7,seed) - image<-tf$image$stateless_random_saturation(image,0.5,2,seed) - image<-tf$image$stateless_random_hue(image,0.2,seed) - list(image,label) -} - -#' @title Perform random geometric transformations on an image. -#' -#' @description Returns a keras model that performs random geometric transformations on an image. -#' -#' @return A keras model. -#' @import keras -imageAugmentationGeometry<-function(){ - model<-keras_model_sequential() - model<-layer_random_flip(model,mode="horizontal") - model<-layer_random_zoom(model,c(0,-.2),c(0,-0.2),fill_mode="constant") - model<-layer_random_rotation(model,c(-0.1,0.1),fill_mode="constant") - model -} \ No newline at end of file diff --git a/R/old/loadMDModel.R b/R/old/loadMDModel.R deleted file mode 100644 index de64a4a..0000000 --- a/R/old/loadMDModel.R +++ /dev/null @@ -1,34 +0,0 @@ -############################################# -#' Load MegaDetector model file from directory or file -#' -#' @param modelfile .pb file or directory obtained from megaDetector -#' -#' @return a tfsession containing the MD model -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{ -#' mdmodel <- "megadetector_v4.1.pb" -#' mdsession <- loadMDModel(mdmodel) -#' } -loadMDModel <- function(modelfile) { - if (dir.exists(modelfile) && file.exists(paste0(modelfile, "/saved_model.pb"))) { - model <- tf$keras$models$load_model(modelfile) - class(model) <- append(class(model), "mdmodel") - model - } - else { - if (!file.exists(modelfile)) { - stop("The given MD model does not exist. Check path.") - } - tfsession <- tf$compat$v1$Session() - f <- tf$io$gfile$GFile(modelfile, "rb") - tfgraphdef <- tf$compat$v1$GraphDef() - tfgraphdef$ParseFromString(f$read()) - tfsession$graph$as_default() - tf$import_graph_def(tfgraphdef, name = "") - class(tfsession) <- append(class(tfsession), "mdsession") - tfsession - } -} \ No newline at end of file diff --git a/R/old/megadetector.R b/R/old/megadetector.R deleted file mode 100644 index b85a36e..0000000 --- a/R/old/megadetector.R +++ /dev/null @@ -1,35 +0,0 @@ -############################################# -#' Load MegaDetector model file from directory or file -#' -#' @param modelfile .pb file or directory obtained from megaDetector -#' -#' @return a tfsession containing the MD model -#' @export -#' @import tensorflow -#' -#' @examples -#' \dontrun{ -#' mdmodel <- "megadetector_v4.1.pb" -#' mdsession <- MegaDetector(mdmodel) -#' } -MegaDetector <- function(model_path) { - # converted keras folder - if (dir.exists(modelfile) && file.exists(paste0(modelfile, "/saved_model.pb"))) { - model <- tf$keras$models$load_model(modelfile) - class(model) <- append(class(model), "mdmodel") - model - } - else { - if (!file.exists(modelfile)) { - stop("The given MD model does not exist. Check path.") - } - tfsession <- tf$compat$v1$Session() - f <- tf$io$gfile$GFile(modelfile, "rb") - tfgraphdef <- tf$compat$v1$GraphDef() - tfgraphdef$ParseFromString(f$read()) - tfsession$graph$as_default() - tf$import_graph_def(tfgraphdef, name = "") - class(tfsession) <- append(class(tfsession), "mdsession") - tfsession - } -} \ No newline at end of file diff --git a/R/old/parseMDjson.R b/R/old/parseMDjson.R deleted file mode 100644 index 6fd5c19..0000000 --- a/R/old/parseMDjson.R +++ /dev/null @@ -1,48 +0,0 @@ -############################################# -#' converte the JSON file produced bye the -#' Python version of MegaDetector into the format -#' produced by detectObjectBatch -#' -#' @param json json data in a list format -#' -#' @return a list of MegaDetector results -#' -#' @examples -#' \dontrun{ -#' mdresults <- parseMDjson(json) -#' } -parseMDjson<-function(json){ - results<-json[[1]] - delete<-numeric() - opb<-pbapply::pboptions(char = "=") - pb <-pbapply::startpb(1,length(results)) #txtProgressBar(min = 0, max = length(results), style = 3) - for(n in 1:length(results)){ - if(!is.null(results[[n]][['failure']])){ - delete<-c(delete,n) - }else{ - if(length(results[[n]]$detections)>0){ - detections<-data.frame(category=character(),conf=numeric(),bbox1=numeric(),bbox2=numeric(),bbox3=numeric(),bbox4=numeric(),stringsAsFactors = F) - for(m in 1:length(results[[n]]$detections)){ - detections<-rbind(detections,data.frame(category=as.character(results[[n]]$detections[[m]]$category), - conf=results[[n]]$detections[[m]]$conf, - bbox1=results[[n]]$detections[[m]]$bbox[[1]], - bbox2=results[[n]]$detections[[m]]$bbox[[2]], - bbox3=results[[n]]$detections[[m]]$bbox[[3]], - bbox4=results[[n]]$detections[[m]]$bbox[[4]],stringsAsFactors=F)) - } - results[[n]]$max_detection_category<-detections$category[which(detections$conf==max(detections$conf))][1] - results[[n]]$detections<-detections - }else{ - results[[n]]$max_detection_category<-"0" - } - results[[n]]<-results[[n]][c(1,2,4,3)] - } - if((n %% round(length(results)/100,0)) ==0) pbapply::setpb(pb,n)#setTxtProgressBar(pb, n) - } - pbapply::setpb(pb,length(results)) - pbapply::closepb(pb) - if(length(delete)>0) - results[-delete] - else - results -} diff --git a/R/old/predictSpecies.R b/R/old/predictSpecies.R deleted file mode 100644 index 187e6e8..0000000 --- a/R/old/predictSpecies.R +++ /dev/null @@ -1,47 +0,0 @@ -#' Classifies Crops Using Specified Models -#' -#' @param input either dataframe with MD crops or list of filenames -#' @param model models with which to classify species -#' @param resize resize images before classification, defaults to 299x299px -#' @param standardize standardize images, defaults to FALSE -#' @param batch number of images processed in each batch (keep small) -#' @param workers number of cores -#' -#' @return a matrix of likelihoods for each class for each image -#' @export -#' -#' @examples -#' \dontrun{ -#' pred <- classifySpecies(imagesallanimal, paste0(modelfile, ".h5"), -#' resize = 456, standardize = FALSE, batch_size = 64, workers = 8) -#' } -predictSpecies <- function(input, model, resize = 456, standardize = FALSE, - batch = 1, workers = 1) { - - if (!file.exists(model)) { stop("The given model file does not exist.") } - - model <- keras::load_model_hdf5(model) - - #crops - if(is(input, "data.frame")){ - steps <- ceiling(nrow(input) / batch) - filecol <- which(colnames(input) %in% c("file", "Frame"))[1] - if("bbox1" %in% colnames(input)){ - dataset <- cropImageGenerator(input[, filecol], input[, c("bbox1", "bbox2", "bbox3", "bbox4")], - resize_height = resize, resize_width = resize, - standardize = standardize, batch = batch) - }else{ - dataset <- ImageGenerator(input[, filecol], resize_height = resize, resize_width = resize, - standardize = standardize, batch = batch) - } - - } - else if (is.vector(input)) { - steps <- ceiling(length(input) / batch) - dataset <- ImageGenerator(input, resize_height = resize, resize_width = resize, - standardize = standardize, batch = batch) - } - else { stop("Input must be a data frame of crops or vector of file names.") } - - stats::predict(model, dataset, step = steps, workers = workers, verbose = 1) -} \ No newline at end of file diff --git a/R/old/setupEnvironment.R b/R/old/setupEnvironment.R deleted file mode 100644 index c1f3bb9..0000000 --- a/R/old/setupEnvironment.R +++ /dev/null @@ -1,19 +0,0 @@ -#' Setup Conda Environment -#' -#' @param config environment config yaml file -#' -#' @return none -#' @import reticulate -#' @export -#' -#' @examples -#' \dontrun{ -#' setupEnv() -#' } -setupEnv <- function(config = "animl/animlenv.yml"){ - try( - reticulate::conda_create(environment = config), - silent=TRUE - ) - reticulate::use_condaenv("animlenv") -} diff --git a/R/old/zzz.R b/R/old/zzz.R deleted file mode 100644 index 1e6492a..0000000 --- a/R/old/zzz.R +++ /dev/null @@ -1,30 +0,0 @@ -.onLoad <- function(libname, pkgname) { - message("Loading animl R package...") - requireNamespace("reticulate", quietly = TRUE) - - if (is.null(reticulate::miniconda_path())) { - message("Miniconda not found. You will be prompted to install and accept the TOS.") - # This will prompt user to accept when called: - reticulate::install_miniconda() - } - - # animl_env does not exist, create - if (!reticulate::condaenv_exists("animl_env")){ - message("animl_env not found, installing... ") - animl_install() - } - # use animl_env - Sys.unsetenv("RETICULATE_PYTHON") - reticulate::use_condaenv("animl_env", required = TRUE) - - # animl-py not available in environment, create - if(!reticulate::py_module_available("animl")){ animl_install() } - - - animl_py <<- reticulate::import("animl") - # if (animl_py$`__version__` != ANIML_VERSION){ -# animl_update() -# } - - message("animl-py loaded successfully.") -} diff --git a/R/reid.R b/R/reid.R index 88da1d5..3ba0b24 100644 --- a/R/reid.R +++ b/R/reid.R @@ -111,7 +111,9 @@ compute_distance_matrix <- function(input1, input2, metric='euclidean'){ #' @export #' #' @examples -#' \dontrun{dist_matrix <- compute_batched_distance_matrix(query_embeddings, database_embeddings, metric='cosine', batch_size=12)} +#' \dontrun{ +#' dist_matrix <- compute_batched_distance_matrix(query_embeddings, database_embeddings, +#' metric='cosine', batch_size=12)} compute_batched_distance_matrix <- function(input1, input2, metric='cosine', batch_size=10){ animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$compute_batched_distance_matrix(input1, input2, metric=metric, batch_size=batch_size) diff --git a/R/zzz.R b/R/zzz.R index 1c05b50..843c83f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,4 @@ .onLoad <- function(libname, pkgname) { - message("Loading animl package...") animl_env_avail <- animl_install() if (animl_env_avail) { animl_py <- load_animl_py() diff --git a/inst/animlenv.yml b/inst/animlenv.yml deleted file mode 100644 index dd2ea48..0000000 --- a/inst/animlenv.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: animlenv -channels: - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - ca-certificates=2022.10.11=h06a4308_0 - - certifi=2022.12.7=py38h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.3=he6710b0_2 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.3=h5eee18b_3 - - openssl=1.1.1s=h7f8727e_0 - - pip=22.1.2=py38h06a4308_0 - - python=3.8.13=h12debd9_0 - - pytz=2022.7=py38h06a4308_0 - - readline=8.1.2=h7f8727e_1 - - setuptools=63.4.1=py38h06a4308_0 - - sqlite=3.39.2=h5082296_0 - - tk=8.6.12=h1ccaba5_0 - - wheel=0.37.1=pyhd3eb1b0_0 - - xz=5.2.5=h7f8727e_1 - - zlib=1.2.12=h7f8727e_2 - - pip: - - absl-py==1.2.0 - - astunparse==1.6.3 - - cachetools==5.2.0 - - charset-normalizer==2.1.1 - - flatbuffers==1.12 - - future==0.18.2 - - gast==0.4.0 - - google-auth==2.11.0 - - google-auth-oauthlib==0.4.6 - - google-pasta==0.2.0 - - grpcio==1.47.0 - - h5py==3.7.0 - - idna==3.3 - - importlib-metadata==4.12.0 - - keras==2.9.0 - - keras-preprocessing==1.1.2 - - libclang==14.0.6 - - markdown==3.4.1 - - markupsafe==2.1.1 - - numpy==1.23.2 - - oauthlib==3.2.0 - - opt-einsum==3.3.0 - - packaging==21.3 - - panoptes-client==1.6.0 - - protobuf==3.19.4 - - pyasn1==0.4.8 - - pyasn1-modules==0.2.8 - - pyparsing==3.0.9 - - python-dateutil==2.8.2 - - python-magic==0.4.27 - - redo==2.0.4 - - requests==2.28.1 - - requests-oauthlib==1.3.1 - - rsa==4.9 - - six==1.16.0 - - tensorboard==2.9.1 - - tensorboard-data-server==0.6.1 - - tensorboard-plugin-wit==1.8.1 - - tensorflow-estimator==2.9.0 - - tensorflow-gpu==2.9.1 - - tensorflow-io-gcs-filesystem==0.26.0 - - termcolor==1.1.0 - - typing-extensions==4.3.0 - - urllib3==1.26.12 - - werkzeug==2.2.2 - - wrapt==1.14.1 - - zipp==3.8.1 - diff --git a/man/classify.Rd b/man/classify.Rd index facd124..03720dd 100644 --- a/man/classify.Rd +++ b/man/classify.Rd @@ -15,7 +15,7 @@ classify( resize_width = 480, resize_height = 480, batch_size = 1, - workers = 1 + num_workers = 1 ) } \arguments{ diff --git a/man/compute_batched_distance_matrix.Rd b/man/compute_batched_distance_matrix.Rd index 78e59b7..61c49fd 100644 --- a/man/compute_batched_distance_matrix.Rd +++ b/man/compute_batched_distance_matrix.Rd @@ -27,5 +27,7 @@ distance matrix Computes the distance matrix in a batched manner to save memory. } \examples{ -\dontrun{dist_matrix <- compute_batched_distance_matrix(query_embeddings, database_embeddings, metric='cosine', batch_size=12)} +\dontrun{ +dist_matrix <- compute_batched_distance_matrix(query_embeddings, database_embeddings, + metric='cosine', batch_size=12)} } diff --git a/man/load_detector.Rd b/man/load_detector.Rd index 1dc1121..69090e2 100644 --- a/man/load_detector.Rd +++ b/man/load_detector.Rd @@ -20,5 +20,6 @@ megadetector object Load an Object Detector } \examples{ -\dontrun{md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt", model_type='mdv5', device = 'cuda:0')} +\dontrun{md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt", + model_type='mdv5', device='cuda:0')} } From 9481902d4a40397f65e4df995618b1b9fad5862b Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Wed, 19 Nov 2025 17:10:11 -0800 Subject: [PATCH 02/12] updates for animl-py 3.1 --- NAMESPACE | 2 + R/classification.R | 24 +++++----- R/detection.R | 4 +- R/export.R | 9 ++-- R/file_management.R | 23 +++++---- R/install.R | 31 ++++++++++-- R/plot_boxes.R | 57 ---------------------- R/video_processing.R | 36 +++++++++----- R/visualization.R | 70 +++++++++++++++++++++++++++ examples/Workflow_reid.R | 87 ++++++++++++++++++++++++++++++++++ man/check_file.Rd | 4 +- man/classify.Rd | 24 +++++----- man/detect.Rd | 2 +- man/export_timelapse.Rd | 8 ++-- man/extract_frames.Rd | 21 ++++---- man/get_frame_as_image.Rd | 22 +++++++++ man/load_classifier.Rd | 4 +- man/parse_detections.Rd | 2 +- man/plot_all_bounding_boxes.Rd | 17 +++++-- man/plot_box.Rd | 23 +++++++-- man/plot_from_file.Rd | 8 ++-- man/update_animl_py.Rd | 22 +++++++++ 22 files changed, 354 insertions(+), 146 deletions(-) delete mode 100644 R/plot_boxes.R create mode 100644 R/visualization.R create mode 100644 examples/Workflow_reid.R create mode 100644 man/get_frame_as_image.Rd create mode 100644 man/update_animl_py.Rd diff --git a/NAMESPACE b/NAMESPACE index f1bcad2..12df11f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(extract_frames) export(extract_miew_embeddings) export(get_animals) export(get_empty) +export(get_frame_as_image) export(list_models) export(load_animl_py) export(load_class_list) @@ -39,5 +40,6 @@ export(single_classification) export(test_main) export(train_main) export(train_val_test) +export(update_animl_py) export(update_labels_from_folders) importFrom(methods,is) diff --git a/R/classification.R b/R/classification.R index 7db4e22..860e72c 100644 --- a/R/classification.R +++ b/R/classification.R @@ -1,7 +1,7 @@ #' Load a Classifier Model with animl-py #' #' @param model_path path to model -#' @param len_classes path to class list +#' @param classes path to class list or loaded class list #' @param device send model to the specified device #' @param architecture model architecture #' @@ -12,9 +12,9 @@ #' \dontrun{ #' classes <- load_class_list('sdzwa_andes_v1_classes.csv') #' andes <- load_classifier('andes_v1.pt', nrow(classes))} -load_classifier <- function(model_path, len_classes, device=NULL, architecture="CTL"){ +load_classifier <- function(model_path, classes, device=NULL, architecture="CTL"){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$load_classifier(model_path, as.integer(len_classes), device=device, architecture=architecture) + animl_py$load_classifier(model_path, classes, device=device, architecture=architecture) } @@ -55,25 +55,27 @@ load_class_list <- function(classlist_file){ #' #' @param model loaded classifier model #' @param detections manifest of animal detections -#' @param device send model to the specified device -#' @param out_file path to csv to save results to +#' @param resize_width image width input size +#' @param resize_height image height input size #' @param file_col column in manifest containing file paths #' @param crop use bbox to crop images before feeding into model #' @param normalize normalize the tensor before inference -#' @param resize_width image width input size -#' @param resize_height image height input size #' @param batch_size batch size for generator -#' @param num_workers number of processes +#' @param num_workers number of processes +#' @param device send model to the specified device +#' @param out_file path to csv to save results to #' #' @return detection manifest with added prediction and confidence columns #' @export #' #' @examples #' \dontrun{animals <- classify(classifier, animals, file_col='filepath')} - classify <- function(model, detections, device=NULL, out_file=NULL, - file_col='frame', crop=TRUE, normalize=TRUE, + classify <- function(model, detections, resize_width=480, resize_height=480, - batch_size=1, num_workers=1){ + file_col='filepath', crop=TRUE, normalize=TRUE, + batch_size=1, num_workers=1, + device=NULL, out_file=NULL){ + animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$classify(model, detections, device=device, out_file=out_file, file_col=file_col, crop=crop, normalize=normalize, diff --git a/R/detection.R b/R/detection.R index 5b505c9..729b85d 100644 --- a/R/detection.R +++ b/R/detection.R @@ -37,7 +37,7 @@ load_detector <- function(model_path, model_type, device=NULL){ #' @examples #' \dontrun{mdres <- detect(md_py, allframes$Frame, 1280, 960, device='cpu')} detect <- function(detector, image_file_names, resize_width, resize_height, - letterbox=TRUE, confidence_threshold=0.1, file_col='frame', + letterbox=TRUE, confidence_threshold=0.1, file_col='filepath', batch_size=1, num_workers=1, device=NULL, checkpoint_path=NULL, checkpoint_frequency=-1){ animl_py <- get("animl_py", envir = parent.env(environment())) @@ -66,7 +66,7 @@ detect <- function(detector, image_file_names, resize_width, resize_height, #' \dontrun{ #' mdresults <- parseMD(mdres) #' } -parse_detections <- function(results, manifest=NULL, out_file=NULL, threshold=0, file_col="frame") { +parse_detections <- function(results, manifest=NULL, out_file=NULL, threshold=0, file_col="filepath") { animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$parse_detections(results, manifest=manifest, out_file=out_file, threshold=threshold, file_col=file_col) diff --git a/R/export.R b/R/export.R index 6454fde..91ba886 100644 --- a/R/export.R +++ b/R/export.R @@ -92,9 +92,8 @@ export_megadetector <- function(manifest, output_file=NULL, detector='MegaDetect #' Converts the Manifests to a csv file that contains columns needed for TimeLapse conversion in later step #' -#' @param animals a DataFrame that has entries of anuimal classification -#' @param empty a DataFrame that has detection of non-animal objects in images -#' @param imagedir location of root directory where all images are stored (can contain subdirectories) +#' @param results a DataFrame that has entries of anuimal classification +#' @param image_dir location of root directory where all images are stored (can contain subdirectories) #' @param only_animal A bool that confirms whether we want only animal detctions or all #' #' @returns animals.csv, non-anim.csv, csv_loc @@ -102,7 +101,7 @@ export_megadetector <- function(manifest, output_file=NULL, detector='MegaDetect #' #' @examples #' \dontrun{export_timelapse(animals, empty, '/path/to/images/')} -export_timelapse <- function(animals, empty, imagedir, only_animal=TRUE){ +export_timelapse <- function(results, image_dir, only_animal=TRUE){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$export_timelapse(animals, empty, imagedir, only_animal=only_animal) + animl_py$export_timelapse(results, image_dir, only_animal=only_animal) } diff --git a/R/file_management.R b/R/file_management.R index 6621ea9..d53f4a7 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -48,23 +48,20 @@ WorkingDirectory <- function(workingdir, pkg.env) { # Assign specific directory paths basedir <- paste0(workingdir, "Animl-Directory/") - pkg.env$datadir <- paste0(basedir, "Data/") - pkg.env$vidfdir <- paste0(basedir, "Frames/") pkg.env$linkdir <- paste0(basedir, "Sorted/") pkg.env$visdir <- paste0(basedir, "Plots/") # Create directories if they do not already exist - dir.create(pkg.env$datadir, recursive = T, showWarnings = F) - dir.create(pkg.env$vidfdir, recursive = T, showWarnings = F) dir.create(pkg.env$linkdir, recursive = T, showWarnings = F) + dir.create(pkg.env$visdir, recursive = T, showWarnings = F) # Assign specific file paths - pkg.env$filemanifest <- paste0(pkg.env$datadir, "FileManifest.csv") - pkg.env$imageframes <- paste0(pkg.env$datadir, "ImageFrames.csv") - pkg.env$results <- paste0(pkg.env$datadir, "Results.csv") - pkg.env$predictions <- paste0(pkg.env$datadir, "Predictions.csv") - pkg.env$detections <- paste0(pkg.env$datadir, "Detections.csv") - pkg.env$mdraw <- paste0(pkg.env$datadir, "MD_Raw.json") + pkg.env$filemanifest <- paste0(pkg.env$basedir, "FileManifest.csv") + pkg.env$imageframes <- paste0(pkg.env$basedir, "ImageFrames.csv") + pkg.env$results <- paste0(pkg.env$basedir, "Results.csv") + pkg.env$predictions <- paste0(pkg.env$basedir, "Predictions.csv") + pkg.env$detections <- paste0(pkg.env$basedir, "Detections.csv") + pkg.env$mdraw <- paste0(pkg.env$basedir, "MD_Raw.json") } @@ -112,6 +109,7 @@ load_data <- function(file) { #' Check for files existence and prompt user if they want to load #' #' @param file the full path of the file to check +#' @param output_type str to specify file name in prompt description #' #' @return a boolean indicating wether a file was found #' and the user wants to load or not @@ -121,11 +119,12 @@ load_data <- function(file) { #' \dontrun{ #' check_file("path/to/newfile.csv") #' } -check_file <- function(file) { +check_file <- function(file, output_type) { if (!is.null(file) && file.exists(file)) { date <- file.info(file)$mtime date <- strsplit(date, split = " ")[[1]][1] - if (tolower(readline(prompt = sprintf("Output file already exists and was last modified %s, would you like to load it? y/n: ", date)) == "y")) { + prompt = sprintf("%s file already exists and was last modified %s, would you like to load it? y/n: ", output_type, date) + if (tolower(readline(prompt = prompt) == "y")) { return(TRUE) } } diff --git a/R/install.R b/R/install.R index 4bfa8cf..237e292 100644 --- a/R/install.R +++ b/R/install.R @@ -1,5 +1,5 @@ # VARIABLE FOR VERSION -ANIML_VERSION <- "3.0.1" +ANIML_VERSION <- "3.1.0" animl_py <- NULL #' Create a miniconda environment for animl and install animl-py @@ -42,6 +42,9 @@ animl_install <- function(py_env = "animl_env", } # conda env exists else{ + # check animl version + update_animl_py() + return(TRUE) } } @@ -65,6 +68,28 @@ load_animl_py <- function() { } +#' Update animl-py version +#' +#' @param py_env name of python environment +#' @param animl_version version of animl to install +#' +#' @returns None +#' @export +#' +#' @examples +#' \dontrun{update_animl_py(py_env = "animl_env", animl_version = ANIML_VERSION)} +update_animl_py <- function(py_env = "animl_env", + animl_version = ANIML_VERSION) { + # load animl-py, check version + animl_py <- reticulate::import("animl", delay_load = TRUE) + version_error <- try(animl_py$'__version__') + if (inherits(version_error, "try-error")){ + print("animl-py version: ", version_error) + reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) + } +} + + #' Check that the python version is compatible with the current version of animl-py @@ -84,8 +109,8 @@ check_python <- function(initialize = TRUE) { "Please install Python befor running animl_initiaialzer().", "For more details run reticulate::py_discover_config()") } - if (utils::compareVersion(as.character(py_version), "3.9") == -1) { - stop("animl needs Python >=3.9") + if (utils::compareVersion(as.character(py_version), "3.12") == -1) { + stop("animl needs Python >=3.12") } packageStartupMessage(sprintf("Python version %s compatible with animl.", py_version)) } diff --git a/R/plot_boxes.R b/R/plot_boxes.R deleted file mode 100644 index aaeabf5..0000000 --- a/R/plot_boxes.R +++ /dev/null @@ -1,57 +0,0 @@ -#' Plot bounding boxes on image from md results -#' -#' @param rows row or rows of images in which the bounding box will be plotted -#' @param file_col Column name containing file paths -#' @param min_conf minimum confidence to plot box -#' @param prediction If True, display the prediction label alongside the bounding box. - -#' -#' @return no return value, produces bounding box in plot panel -#' @export -#' -#' @examples -#' \dontrun{ -#' test_image <- classify(classifier_model, test_image, file_col='filepath') -#' plot_box(test_image, file_col='filepath', minconf = 0.5, prediction=TRUE) -#' } -plot_box <- function(rows, file_col='filepath', min_conf = 0, prediction=FALSE) { - animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$plot_box(rows, file_col=file_col, min_conf=min_conf, prediction=prediction) -} - - -#' Plot all bounding boxes in a manifest -#' -#' @param manifest manifest of detections -#' @param out_dir Name of the output directory -#' @param file_col Column name containing file paths -#' @param min_conf Confidence threshold to plot the box -#' @param prediction flag determining whether prediction be printed alongside bounding box -#' -#' @return None -#' @export -#' -#' @examples -#' \dontrun{plot_all_bounding_boxes(manifest, 'Plots/''')} -plot_all_bounding_boxes <- function(manifest, out_dir, file_col='frame', - min_conf=0.1, prediction=FALSE){ - animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$plot_all_bounding_boxes(manifest, out_dir, file_col=file_col, - min_conf=min_conf, prediction=prediction) -} - - -#' Read a CSV manifest file and perform box plotting on the images. -#' -#' @param csv_file Path to the CSV file. -#' @param output_dir Saved location of boxed images output dir. -#' -#' @return None -#' @export -#' -#' @examples -#' \dontrun{plot_from_file('manifest.csv', 'Plots/''')} -plot_from_file <- function(csv_file, output_dir){ - animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$plot_from_file(csv_file, output_dir) -} \ No newline at end of file diff --git a/R/video_processing.R b/R/video_processing.R index b1a21b2..583fb08 100644 --- a/R/video_processing.R +++ b/R/video_processing.R @@ -1,16 +1,13 @@ #' Extract frames from video for classification #' -#' This function can take -#' #' @param files dataframe of videos -#' @param out_dir directory to save frames to -#' @param out_file file to which results will be saved -#' @param fps frames per second, otherwise determine mathematically #' @param frames number of frames to sample +#' @param fps frames per second, otherwise determine mathematically +#' @param out_dir directory to save frames to if not null +#' @param out_file csv file to which results will be saved #' @param file_col string value indexing which column contains file paths #' @param parallel Toggle for parallel processing, defaults to FALSE #' @param num_workers number of processors to use if parallel, defaults to 1 -#' @param checkpoint if not parallel, checkpoint ever n files, defaults to 1000 #' #' @return dataframe of still frames for each video #' @export @@ -19,13 +16,28 @@ #' \dontrun{ #' frames <- extract_frames(manifest, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) #' } -extract_frames <- function(files, out_dir = tempfile(), out_file = NULL, - fps = NULL, frames = NULL, file_col="filepath", - parallel = FALSE, num_workers = 1, checkpoint = 1000) { +extract_frames <- function(files, frames=5, fps = NULL, out_dir = NULL, out_file = NULL, + file_col="filepath", parallel = FALSE, num_workers = 1){ if (!is.null(fps)){ fps <- as.integer(fps) } if (!is.null(frames)){ frames <- as.integer(frames) } animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$extract_frames(files, out_dir, out_file=out_file, fps=fps, frames=frames, - file_col=file_col, parallel=parallel, num_workers=as.integer(num_workers), - checkpoint=as.integer(checkpoint)) + animl_py$extract_frames(files, frames=frames, fps=fps, out_dir, out_file=out_file, + file_col=file_col, parallel=parallel, num_workers=as.integer(num_workers)) } + + +#' Given a video path, return a specific frame as an RGB image +#' +#' @param video_path path to video file +#' @param frame frame number to extract (default is 0) + +#' +#' @returns rgb_frame: extracted frame as RGB image +#' @export +#' +#' @examples +#' \dontrun{get_frame_as_image('/example/path/to/video.mp4', frame=213)} +get_frame_as_image <- function(video_path, frame=0){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$get_frame_as_image(video_path, frame=frame) +} \ No newline at end of file diff --git a/R/visualization.R b/R/visualization.R new file mode 100644 index 0000000..aac81b0 --- /dev/null +++ b/R/visualization.R @@ -0,0 +1,70 @@ +#' Plot bounding boxes on image from md results +#' +#' @param rows row or rows of images in which the bounding box will be plotted +#' @param file_col Column name containing file paths +#' @param min_conf minimum confidence to plot box +#' @param label_col Column name containing class to print above the box. If None, no label is printed. +#' @param show_confidence If true, show confidence score above the box. +#' @param colors Named list mapping class labels to BGR color tuples for the bounding boxes. +#' @param detector_labels Named list mapping detector categories to human-readable labels. +#' @param return_img If true, return the image array with boxes overlaid, otherwise display it +#' +#' @return no return value, produces bounding box in plot panel +#' @export +#' +#' @examples +#' \dontrun{ +#' test_image <- classify(classifier_model, test_image, file_col='filepath') +#' plot_box(test_image, file_col='filepath', minconf = 0.5, prediction=TRUE) +#' } +plot_box <- function(rows, file_col='filepath', min_conf = 0, label_col=NULL, + show_confidence=FALSE, colors = NULL, detector_labels = NULL, + return_img=FALSE) { + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$plot_box(rows, file_col=file_col, min_conf=min_conf, label_col=label_col, + show_confidence=show_confidence, colors=colors, detector_labels=detector_labels, + return_img=return_img) + } + + +#' Plot all bounding boxes in a manifest +#' +#' @param manifest manifest of detections +#' @param out_dir Name of the output directory +#' @param file_col Column name containing file paths +#' @param min_conf Confidence threshold to plot the box +#' @param label_col Column name containing class to print above the box. If None, no label is printed. +#' @param show_confidence If true, show confidence score above the box. +#' @param colors Named list mapping class labels to BGR color tuples for the bounding boxes. +#' @param detector_labels Named list mapping detector categories to human-readable labels. +#' +#' @return None +#' @export +#' +#' @examples +#' \dontrun{plot_all_bounding_boxes(manifest, 'Plots/''')} +plot_all_bounding_boxes <- function(manifest, out_dir, file_col='filepath', min_conf=0.1, + label_col=FALSE, show_confidence = FALSE, + colors = NULL, detector_labels = NULL){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$plot_all_bounding_boxes(manifest, out_dir, file_col=file_col, min_conf=min_conf, + label_col=label_col, show_confidence=show_confidence, + colors=colors, detector_labels=detector_labels) +} + + +#' Read a CSV manifest file and perform box plotting on the images. +#' +#' @param csv_file Path to the CSV file. +#' @param out_dir Saved location of boxed images output dir. +#' @param file_col Column name containing file paths. +#' +#' @return None +#' @export +#' +#' @examples +#' \dontrun{plot_from_file('manifest.csv', 'Plots/''')} +plot_from_file <- function(csv_file, out_dir, file_col = 'filepath'){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$plot_from_file(csv_file, out_dir, file_col=file_col) +} \ No newline at end of file diff --git a/examples/Workflow_reid.R b/examples/Workflow_reid.R new file mode 100644 index 0000000..390f751 --- /dev/null +++ b/examples/Workflow_reid.R @@ -0,0 +1,87 @@ +# animl Classification Workflow +# +# c 2021 Mathias Tobler +# Maintained by Kyra Swanson +# +# +#------------------------------------------------------------------------------- +# Setup +#------------------------------------------------------------------------------- +library(animl) +library(reticulate) +use_condaenv("animl-gpu") + +imagedir <- "/home/kyra/animl-py/examples/Southwest/" + +#create global variable file and directory namesfrom animl import file_management +WorkingDirectory(imagedir, globalenv()) + +# Build file manifest for all images and videos within base directory +files <- build_file_manifest(imagedir, out_file=filemanifest, exif=TRUE) + +#=============================================================================== +# Add Project-Specific Info +#====================================+========================================== + +# Get Station +basedepth=length(strsplit(imagedir,split="/")[[1]]) +files$Station <- sapply(files$FilePath, function(x) strsplit(x,"/")[[1]][basedepth]) + +# Process videos, extract frames for ID +allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, + frames=1, parallel=F, workers=parallel::detectCores()) + +#=============================================================================== +# MegaDetector +#=============================================================================== +# Most functions assume MegaDetector version 5. If using an earlier version of +# MD, specify detectObjectBatch with argument 'mdversion'. + +# PyTorch Via Animl-Py +md_py <- megadetector("/home/kyra/animl-py/models/md_v5a.0.0.pt") + +mdraw <- detect_MD_batch(md_py, allframes) +mdresults <- parse_MD(mdraw, manifest = allframes, out_file = detections) + +#mdresults <- read.csv(detections) +#mdresults$Station <- sapply(mdresults$FilePath, function(x) strsplit(x,"/")[[1]][5]) +#select animal crops for classification +animals <- get_animals(mdresults) +empty <- get_empty(mdresults) + +#=============================================================================== +# Species Classifier +#=============================================================================== + +classes <- read.csv('~/models/sdzwa_southwest_v3_classes.csv') +class_list <- classes$Code +southwest <- load_model('/home/kyra/models/sdzwa_southwest_v3.pt', length(class_list)) + +# get likelihoods +pred_raw <- predict_species(animals, southwest, out_file=predictions) + +# Single Classification +animals <- single_classification(animals, pred_raw, class_list) +manifest <- rbind(animals, empty) + +# Sequence Classification +manifest <- sequence_classification(animals, empty=empty, pred_raw, classes=class_list, "Station", emptyclass="empty") + + +#=============================================================================== +# Symlinks +#=============================================================================== + +#symlink species predictions +alldata <- sort_species(manifest, linkdir) +write.csv(alldata, results) + +#symlink MD detections only +sort_MD(manifest, linkdir) + +#=============================================================================== +# REID +#=============================================================================== +miew = load_miewid("/home/kyra/models/miewid_v3.bin") +embeddings = extract_embeddings(files, miew) + diff --git a/man/check_file.Rd b/man/check_file.Rd index b962cbd..d0c088e 100644 --- a/man/check_file.Rd +++ b/man/check_file.Rd @@ -4,10 +4,12 @@ \alias{check_file} \title{Check for files existence and prompt user if they want to load} \usage{ -check_file(file) +check_file(file, output_type) } \arguments{ \item{file}{the full path of the file to check} + +\item{output_type}{str to specify file name in prompt description} } \value{ a boolean indicating wether a file was found diff --git a/man/classify.Rd b/man/classify.Rd index 03720dd..c21429e 100644 --- a/man/classify.Rd +++ b/man/classify.Rd @@ -7,15 +7,15 @@ classify( model, detections, - device = NULL, - out_file = NULL, - file_col = "frame", - crop = TRUE, - normalize = TRUE, resize_width = 480, resize_height = 480, + file_col = "filepath", + crop = TRUE, + normalize = TRUE, batch_size = 1, - num_workers = 1 + num_workers = 1, + device = NULL, + out_file = NULL ) } \arguments{ @@ -23,9 +23,9 @@ classify( \item{detections}{manifest of animal detections} -\item{device}{send model to the specified device} +\item{resize_width}{image width input size} -\item{out_file}{path to csv to save results to} +\item{resize_height}{image height input size} \item{file_col}{column in manifest containing file paths} @@ -33,13 +33,13 @@ classify( \item{normalize}{normalize the tensor before inference} -\item{resize_width}{image width input size} - -\item{resize_height}{image height input size} - \item{batch_size}{batch size for generator} \item{num_workers}{number of processes} + +\item{device}{send model to the specified device} + +\item{out_file}{path to csv to save results to} } \value{ detection manifest with added prediction and confidence columns diff --git a/man/detect.Rd b/man/detect.Rd index d0171f9..47b3972 100644 --- a/man/detect.Rd +++ b/man/detect.Rd @@ -11,7 +11,7 @@ detect( resize_height, letterbox = TRUE, confidence_threshold = 0.1, - file_col = "frame", + file_col = "filepath", batch_size = 1, num_workers = 1, device = NULL, diff --git a/man/export_timelapse.Rd b/man/export_timelapse.Rd index 669793e..afc89d5 100644 --- a/man/export_timelapse.Rd +++ b/man/export_timelapse.Rd @@ -4,14 +4,12 @@ \alias{export_timelapse} \title{Converts the Manifests to a csv file that contains columns needed for TimeLapse conversion in later step} \usage{ -export_timelapse(animals, empty, imagedir, only_animal = TRUE) +export_timelapse(results, image_dir, only_animal = TRUE) } \arguments{ -\item{animals}{a DataFrame that has entries of anuimal classification} +\item{results}{a DataFrame that has entries of anuimal classification} -\item{empty}{a DataFrame that has detection of non-animal objects in images} - -\item{imagedir}{location of root directory where all images are stored (can contain subdirectories)} +\item{image_dir}{location of root directory where all images are stored (can contain subdirectories)} \item{only_animal}{A bool that confirms whether we want only animal detctions or all} } diff --git a/man/extract_frames.Rd b/man/extract_frames.Rd index cabad31..e42210e 100644 --- a/man/extract_frames.Rd +++ b/man/extract_frames.Rd @@ -6,40 +6,37 @@ \usage{ extract_frames( files, - out_dir = tempfile(), - out_file = NULL, + frames = 5, fps = NULL, - frames = NULL, + out_dir = NULL, + out_file = NULL, file_col = "filepath", parallel = FALSE, - num_workers = 1, - checkpoint = 1000 + num_workers = 1 ) } \arguments{ \item{files}{dataframe of videos} -\item{out_dir}{directory to save frames to} - -\item{out_file}{file to which results will be saved} +\item{frames}{number of frames to sample} \item{fps}{frames per second, otherwise determine mathematically} -\item{frames}{number of frames to sample} +\item{out_dir}{directory to save frames to if not null} + +\item{out_file}{csv file to which results will be saved} \item{file_col}{string value indexing which column contains file paths} \item{parallel}{Toggle for parallel processing, defaults to FALSE} \item{num_workers}{number of processors to use if parallel, defaults to 1} - -\item{checkpoint}{if not parallel, checkpoint ever n files, defaults to 1000} } \value{ dataframe of still frames for each video } \description{ -This function can take +Extract frames from video for classification } \examples{ \dontrun{ diff --git a/man/get_frame_as_image.Rd b/man/get_frame_as_image.Rd new file mode 100644 index 0000000..0fedd24 --- /dev/null +++ b/man/get_frame_as_image.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/video_processing.R +\name{get_frame_as_image} +\alias{get_frame_as_image} +\title{Given a video path, return a specific frame as an RGB image} +\usage{ +get_frame_as_image(video_path, frame = 0) +} +\arguments{ +\item{video_path}{path to video file} + +\item{frame}{frame number to extract (default is 0)} +} +\value{ +rgb_frame: extracted frame as RGB image +} +\description{ +Given a video path, return a specific frame as an RGB image +} +\examples{ +\dontrun{get_frame_as_image('/example/path/to/video.mp4', frame=213)} +} diff --git a/man/load_classifier.Rd b/man/load_classifier.Rd index fe23b85..3d0ac54 100644 --- a/man/load_classifier.Rd +++ b/man/load_classifier.Rd @@ -4,12 +4,12 @@ \alias{load_classifier} \title{Load a Classifier Model with animl-py} \usage{ -load_classifier(model_path, len_classes, device = NULL, architecture = "CTL") +load_classifier(model_path, classes, device = NULL, architecture = "CTL") } \arguments{ \item{model_path}{path to model} -\item{len_classes}{path to class list} +\item{classes}{path to class list or loaded class list} \item{device}{send model to the specified device} diff --git a/man/parse_detections.Rd b/man/parse_detections.Rd index 5449c4f..00f71fd 100644 --- a/man/parse_detections.Rd +++ b/man/parse_detections.Rd @@ -9,7 +9,7 @@ parse_detections( manifest = NULL, out_file = NULL, threshold = 0, - file_col = "frame" + file_col = "filepath" ) } \arguments{ diff --git a/man/plot_all_bounding_boxes.Rd b/man/plot_all_bounding_boxes.Rd index b37ac81..e1b2f69 100644 --- a/man/plot_all_bounding_boxes.Rd +++ b/man/plot_all_bounding_boxes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_boxes.R +% Please edit documentation in R/visualization.R \name{plot_all_bounding_boxes} \alias{plot_all_bounding_boxes} \title{Plot all bounding boxes in a manifest} @@ -7,9 +7,12 @@ plot_all_bounding_boxes( manifest, out_dir, - file_col = "frame", + file_col = "filepath", min_conf = 0.1, - prediction = FALSE + label_col = FALSE, + show_confidence = FALSE, + colors = NULL, + detector_labels = NULL ) } \arguments{ @@ -21,7 +24,13 @@ plot_all_bounding_boxes( \item{min_conf}{Confidence threshold to plot the box} -\item{prediction}{flag determining whether prediction be printed alongside bounding box} +\item{label_col}{Column name containing class to print above the box. If None, no label is printed.} + +\item{show_confidence}{If true, show confidence score above the box.} + +\item{colors}{Named list mapping class labels to BGR color tuples for the bounding boxes.} + +\item{detector_labels}{Named list mapping detector categories to human-readable labels.} } \value{ None diff --git a/man/plot_box.Rd b/man/plot_box.Rd index a9c6e23..4e1a21a 100644 --- a/man/plot_box.Rd +++ b/man/plot_box.Rd @@ -1,10 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_boxes.R +% Please edit documentation in R/visualization.R \name{plot_box} \alias{plot_box} \title{Plot bounding boxes on image from md results} \usage{ -plot_box(rows, file_col = "filepath", min_conf = 0, prediction = FALSE) +plot_box( + rows, + file_col = "filepath", + min_conf = 0, + label_col = NULL, + show_confidence = FALSE, + colors = NULL, + detector_labels = NULL, + return_img = FALSE +) } \arguments{ \item{rows}{row or rows of images in which the bounding box will be plotted} @@ -13,7 +22,15 @@ plot_box(rows, file_col = "filepath", min_conf = 0, prediction = FALSE) \item{min_conf}{minimum confidence to plot box} -\item{prediction}{If True, display the prediction label alongside the bounding box.} +\item{label_col}{Column name containing class to print above the box. If None, no label is printed.} + +\item{show_confidence}{If true, show confidence score above the box.} + +\item{colors}{Named list mapping class labels to BGR color tuples for the bounding boxes.} + +\item{detector_labels}{Named list mapping detector categories to human-readable labels.} + +\item{return_img}{If true, return the image array with boxes overlaid, otherwise display it} } \value{ no return value, produces bounding box in plot panel diff --git a/man/plot_from_file.Rd b/man/plot_from_file.Rd index a4ae225..c80973d 100644 --- a/man/plot_from_file.Rd +++ b/man/plot_from_file.Rd @@ -1,15 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_boxes.R +% Please edit documentation in R/visualization.R \name{plot_from_file} \alias{plot_from_file} \title{Read a CSV manifest file and perform box plotting on the images.} \usage{ -plot_from_file(csv_file, output_dir) +plot_from_file(csv_file, out_dir, file_col = "filepath") } \arguments{ \item{csv_file}{Path to the CSV file.} -\item{output_dir}{Saved location of boxed images output dir.} +\item{out_dir}{Saved location of boxed images output dir.} + +\item{file_col}{Column name containing file paths.} } \value{ None diff --git a/man/update_animl_py.Rd b/man/update_animl_py.Rd new file mode 100644 index 0000000..c4a4743 --- /dev/null +++ b/man/update_animl_py.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install.R +\name{update_animl_py} +\alias{update_animl_py} +\title{Update animl-py version} +\usage{ +update_animl_py(py_env = "animl_env", animl_version = ANIML_VERSION) +} +\arguments{ +\item{py_env}{name of python environment} + +\item{animl_version}{version of animl to install} +} +\value{ +None +} +\description{ +Update animl-py version +} +\examples{ +\dontrun{update_animl_py(py_env = "animl_env", animl_version = ANIML_VERSION)} +} From e36d2e961399874b3d56aed9278fc26fcf42ef4c Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Thu, 27 Nov 2025 13:23:05 -0800 Subject: [PATCH 03/12] check python version, update if mismatch --- R/install.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/install.R b/R/install.R index 237e292..1a9be7c 100644 --- a/R/install.R +++ b/R/install.R @@ -44,7 +44,6 @@ animl_install <- function(py_env = "animl_env", else{ # check animl version update_animl_py() - return(TRUE) } } @@ -87,6 +86,16 @@ update_animl_py <- function(py_env = "animl_env", print("animl-py version: ", version_error) reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) } + else{ + r_version <- strsplit(animl_version, ".", fixed = TRUE)[[1]] + py_version <- strsplit(version_error, ".", fixed = TRUE)[[1]] + + #r == py + if (!identical(r_version, py_version)){ + packageStartupMessage("animl-py version mismatch, reinstalling...") + reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) + } + } } From d7c0963e80852723fde5027e93e2ac745e458b92 Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Wed, 3 Dec 2025 10:02:51 -0800 Subject: [PATCH 04/12] annonymize paths --- examples/Workflow_animl-py.R | 8 ++++---- examples/Workflow_reid.R | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/Workflow_animl-py.R b/examples/Workflow_animl-py.R index f9d598b..9a25997 100644 --- a/examples/Workflow_animl-py.R +++ b/examples/Workflow_animl-py.R @@ -13,7 +13,7 @@ use_condaenv("animl-gpu") animl_py <- load_animl_py() -imagedir <- "C:\\Users\\Kyra\\animl\\examples\\Southwest" +imagedir <- "\\examples\\Southwest" #create global variable file and directory namesfrom animl import file_management WorkingDirectory(imagedir, globalenv()) @@ -41,7 +41,7 @@ allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, # MD, specify detectObjectBatch with argument 'mdversion'. # PyTorch Via Animl-Py -md_py <- load_detector("C:\\Users\\Kyra\\animl-py\\models\\md_v5a.0.0.pt", model_type = 'mdv5') +md_py <- load_detector("~/models/md_v5a.0.0.pt", model_type = 'mdv5') mdraw <- detect(md_py, allframes, 1280, 1280, batch_size=4) mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections) @@ -56,9 +56,9 @@ empty <- get_empty(mdresults) # Species Classifier #=============================================================================== -classes <- load_class_list('C:\\Users\\Kyra\\animl-py\\models\\sdzwa_southwest_v3_classes.csv') +classes <- load_class_list('~/models/sdzwa_southwest_v3_classes.csv') class_list <- classes$class -southwest <- load_classifier('C:\\Users\\Kyra\\animl-py\\models\\sdzwa_southwest_v3.pt', length(class_list)) +southwest <- load_classifier('~/models/sdzwa_southwest_v3.pt', length(class_list)) # get likelihoods pred_raw <- classify(southwest, animals, resize_width=299, resize_height=299, out_file=predictions, batch_size=4) diff --git a/examples/Workflow_reid.R b/examples/Workflow_reid.R index 390f751..171785a 100644 --- a/examples/Workflow_reid.R +++ b/examples/Workflow_reid.R @@ -11,7 +11,7 @@ library(animl) library(reticulate) use_condaenv("animl-gpu") -imagedir <- "/home/kyra/animl-py/examples/Southwest/" +imagedir <- "/examples/Southwest/" #create global variable file and directory namesfrom animl import file_management WorkingDirectory(imagedir, globalenv()) @@ -38,7 +38,7 @@ allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, # MD, specify detectObjectBatch with argument 'mdversion'. # PyTorch Via Animl-Py -md_py <- megadetector("/home/kyra/animl-py/models/md_v5a.0.0.pt") +md_py <- megadetector("~/models/md_v5a.0.0.pt") mdraw <- detect_MD_batch(md_py, allframes) mdresults <- parse_MD(mdraw, manifest = allframes, out_file = detections) @@ -55,7 +55,7 @@ empty <- get_empty(mdresults) classes <- read.csv('~/models/sdzwa_southwest_v3_classes.csv') class_list <- classes$Code -southwest <- load_model('/home/kyra/models/sdzwa_southwest_v3.pt', length(class_list)) +southwest <- load_model('~/models/sdzwa_southwest_v3.pt', length(class_list)) # get likelihoods pred_raw <- predict_species(animals, southwest, out_file=predictions) @@ -82,6 +82,6 @@ sort_MD(manifest, linkdir) #=============================================================================== # REID #=============================================================================== -miew = load_miewid("/home/kyra/models/miewid_v3.bin") +miew = load_miewid("~/models/miewid_v3.bin") embeddings = extract_embeddings(files, miew) From 6836fc86877eb2b2b05ebb8f515338f7a3426a4f Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Thu, 25 Dec 2025 10:22:53 -0800 Subject: [PATCH 05/12] update for 3.1.1 --- NAMESPACE | 3 +++ R/classification.R | 15 +++++++------ R/detection.R | 10 ++++----- R/export.R | 35 ++++++++++++++++++++++++------- R/file_management.R | 35 +++++++++++++++++++++++++++++++ R/install.R | 5 +---- R/reid.R | 4 ++-- R/video_processing.R | 6 +++--- R/visualization.R | 8 +++---- man/export_coco.Rd | 30 ++++++++++++++++++++++++++ man/export_megadetector.Rd | 5 ++++- man/extract_frames.Rd | 6 +++--- man/load_classifier.Rd | 6 +++--- man/load_detector.Rd | 4 ++-- man/load_json.Rd | 22 +++++++++++++++++++ man/load_miew.Rd | 2 +- man/parse_detections.Rd | 8 +++---- man/save_json.Rd | 26 +++++++++++++++++++++++ man/update_labels_from_folders.Rd | 6 +++--- 19 files changed, 188 insertions(+), 48 deletions(-) create mode 100644 man/export_coco.Rd create mode 100644 man/load_json.Rd create mode 100644 man/save_json.Rd diff --git a/NAMESPACE b/NAMESPACE index 12df11f..792ca69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(delete_pyenv) export(detect) export(download_model) export(euclidean_squared_distance) +export(export_coco) export(export_folders) export(export_megadetector) export(export_timelapse) @@ -27,6 +28,7 @@ export(load_class_list) export(load_classifier) export(load_data) export(load_detector) +export(load_json) export(load_miew) export(parse_detections) export(plot_all_bounding_boxes) @@ -35,6 +37,7 @@ export(plot_from_file) export(remove_diagonal) export(remove_link) export(save_classifier) +export(save_json) export(sequence_classification) export(single_classification) export(test_main) diff --git a/R/classification.R b/R/classification.R index 860e72c..328f09e 100644 --- a/R/classification.R +++ b/R/classification.R @@ -1,11 +1,11 @@ -#' Load a Classifier Model with animl-py +#' Load a Classifier Model and Class_list #' #' @param model_path path to model #' @param classes path to class list or loaded class list #' @param device send model to the specified device #' @param architecture model architecture #' -#' @return classifier model +#' @return classifier model, class list #' @export #' #' @examples @@ -77,10 +77,13 @@ load_class_list <- function(classlist_file){ device=NULL, out_file=NULL){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$classify(model, detections, device=device, out_file=out_file, - file_col=file_col, crop=crop, normalize=normalize, - resize_width=as.integer(resize_width), resize_height=as.integer(resize_height), - batch_size=as.integer(batch_size), num_workers=as.integer(num_workers)) + animl_py$classify(model, detections, + resize_width=as.integer(resize_width), + resize_height=as.integer(resize_height), + file_col=file_col, crop=crop, normalize=normalize, + batch_size=as.integer(batch_size), + num_workers=as.integer(num_workers), + device=device, out_file=out_file) } diff --git a/R/detection.R b/R/detection.R index 729b85d..02e19fc 100644 --- a/R/detection.R +++ b/R/detection.R @@ -1,10 +1,10 @@ #' Load an Object Detector #' #' @param model_path path to detector model file -#' @param model_type type of model expected ie "MDV5", "MDV6", "YOLO" +#' @param model_type type of model expected ie "MDV5", "MDV6", "YOLO", "ONNX" #' @param device specify to run on cpu or gpu #' -#' @return megadetector object +#' @return detector object #' @export #' #' @examples @@ -51,11 +51,11 @@ detect <- function(detector, image_file_names, resize_width, resize_height, } -#' parse MD results into a simple dataframe +#' Parse MD results into a simple dataframe #' #' @param results json output from megadetector -#' @param manifest dataframe containing all frames -#' @param out_file path to save dataframe +#' @param manifest optional dataframe containing all frames +#' @param out_file optional path to save dataframe #' @param threshold confidence threshold to include bbox #' @param file_col column in manifest that refers to file paths #' diff --git a/R/export.R b/R/export.R index 91ba886..f779a79 100644 --- a/R/export.R +++ b/R/export.R @@ -48,7 +48,7 @@ remove_link <- function(manifest, link_col='link'){ #' Udate Results from File Browser #' #' @param manifest dataframe containing file data and predictions -#' @param link_dir directory to sort files into +#' @param export_dir directory to sort files into #' @param unique_name column name indicating a unique file name for each row #' #' @return dataframe with new "Species" column that contains the verified species @@ -56,13 +56,13 @@ remove_link <- function(manifest, link_col='link'){ #' #' @examples #' \dontrun{ -#' results <- update_labels_from_folders(manifest, link_dir) +#' results <- update_labels_from_folders(manifest, export_dir) #' } -update_labels_from_folders <- function(manifest, link_dir, unique_name='uniquename'){ - if (!dir.exists(link_dir)) {stop("The given directory does not exist.")} +update_labels_from_folders <- function(manifest, export_dir, unique_name='uniquename'){ + if (!dir.exists(export_dir)) {stop("The given directory does not exist.")} if (!unique_name %in% names(manifest)) {stop("Manifest does not have unique names, cannot match to sorted directories.")} - FilePath <- list.files(link_dir, recursive = TRUE, include.dirs = TRUE) + FilePath <- list.files(export_dir, recursive = TRUE, include.dirs = TRUE) files <- data.frame(FilePath) files[unique_name] <- sapply(files$FilePath,function(x)strsplit(x,"/")[[1]][2]) @@ -73,20 +73,41 @@ update_labels_from_folders <- function(manifest, link_dir, unique_name='uniquena } +#' Export a manifest to COCO format. +#' +#' @param manifest dataframe containing images and associated predictions +#' @param class_list dataframe containing class names and their corresponding IDs +#' @param out_file path to save the COCO formatted file +#' @param info optional info section of COCO file +#' @param licenses optional licenses section of COCO file +#' +#' @returns coco formatted json file saved to out_file +#' @export +#' +#' @examples +#' \dontrun{ +#' export_coco(manifest, classes, "path/to/out.json") +#' } +export_coco <- function(manifest, class_list, out_file, info=NULL, licenses=NULL){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$export_coco(manifest, class_list, out_file, info=info, licenses=licenses) +} + #' Converts the .csv file to the MD-formatted .json file. #' #' @param manifest dataframe containing images and associated detections #' @param output_file path to save the MD formatted file #' @param detector name of the detector model used +#' @param prompt whether to prompt before overwriting existing file #' #' @return None #' @export #' #' @examples #' \dontrun{export_megadetector(manifest, output_file= 'results.json', detector='MDv6')} -export_megadetector <- function(manifest, output_file=NULL, detector='MegaDetector v5a'){ +export_megadetector <- function(manifest, output_file=NULL, detector='MegaDetector v5a', prompt=TRUE){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$export_megadetector(manifest, output_file=output_file, detector=detector) + animl_py$export_megadetector(manifest, output_file=output_file, detector=detector, prompt=prompt) } diff --git a/R/file_management.R b/R/file_management.R index d53f4a7..4dc5e94 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -132,6 +132,41 @@ check_file <- function(file, output_type) { } +#' Save data to a JSON file. +#' +#' @param data the dictionary to be saved +#' @param out_file full path to save file to +#' @param prompt prompt user to confirm overwrite +#' +#' @returns None +#' @export +#' +#' @examples +#' \dontrun{ +#' save_json(mdresults, 'mdraw.json') +#' } +save_json <- function(data, out_file, prompt=TRUE){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$save_json(data, out_file, prompt=prompt) +} + + +#' Load data from a JSON file. +#' +#' @param file the full path of the file to load +#' +#' @returns loaded json file +#' @export +#' +#' @examples +#' \dontrun{ +#' mdraw <- load_json('mdraw.json') +#' } +load_json <- function(file){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$load_json(file) +} + #' Download specified model to the given directory. #' diff --git a/R/install.R b/R/install.R index 1a9be7c..d48de76 100644 --- a/R/install.R +++ b/R/install.R @@ -87,11 +87,8 @@ update_animl_py <- function(py_env = "animl_env", reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) } else{ - r_version <- strsplit(animl_version, ".", fixed = TRUE)[[1]] - py_version <- strsplit(version_error, ".", fixed = TRUE)[[1]] - #r == py - if (!identical(r_version, py_version)){ + if (!ANIML_VERSION == version_error){ packageStartupMessage("animl-py version mismatch, reinstalling...") reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) } diff --git a/R/reid.R b/R/reid.R index 3ba0b24..f995b1f 100644 --- a/R/reid.R +++ b/R/reid.R @@ -1,8 +1,8 @@ #' Load MiewID model #' #' @param file_path path to model weights -#' @param device toggle cpu or gpu -#' +#' @param device device to load model to + #' @returns meiwid model #' @export #' diff --git a/R/video_processing.R b/R/video_processing.R index 583fb08..188cfea 100644 --- a/R/video_processing.R +++ b/R/video_processing.R @@ -3,8 +3,8 @@ #' @param files dataframe of videos #' @param frames number of frames to sample #' @param fps frames per second, otherwise determine mathematically -#' @param out_dir directory to save frames to if not null #' @param out_file csv file to which results will be saved +#' @param out_dir directory to save frames to if not null #' @param file_col string value indexing which column contains file paths #' @param parallel Toggle for parallel processing, defaults to FALSE #' @param num_workers number of processors to use if parallel, defaults to 1 @@ -16,12 +16,12 @@ #' \dontrun{ #' frames <- extract_frames(manifest, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) #' } -extract_frames <- function(files, frames=5, fps = NULL, out_dir = NULL, out_file = NULL, +extract_frames <- function(files, frames=5, fps = NULL, out_file = NULL, out_dir = NULL, file_col="filepath", parallel = FALSE, num_workers = 1){ if (!is.null(fps)){ fps <- as.integer(fps) } if (!is.null(frames)){ frames <- as.integer(frames) } animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$extract_frames(files, frames=frames, fps=fps, out_dir, out_file=out_file, + animl_py$extract_frames(files, frames=frames, fps=fps, out_file=out_file, out_dir=out_dir, file_col=file_col, parallel=parallel, num_workers=as.integer(num_workers)) } diff --git a/R/visualization.R b/R/visualization.R index aac81b0..5186ff3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -17,8 +17,8 @@ #' test_image <- classify(classifier_model, test_image, file_col='filepath') #' plot_box(test_image, file_col='filepath', minconf = 0.5, prediction=TRUE) #' } -plot_box <- function(rows, file_col='filepath', min_conf = 0, label_col=NULL, - show_confidence=FALSE, colors = NULL, detector_labels = NULL, +plot_box <- function(rows, file_col='filepath', min_conf=0, label_col=NULL, + show_confidence=FALSE, colors=NULL, detector_labels=NULL, return_img=FALSE) { animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$plot_box(rows, file_col=file_col, min_conf=min_conf, label_col=label_col, @@ -44,8 +44,8 @@ plot_box <- function(rows, file_col='filepath', min_conf = 0, label_col=NULL, #' @examples #' \dontrun{plot_all_bounding_boxes(manifest, 'Plots/''')} plot_all_bounding_boxes <- function(manifest, out_dir, file_col='filepath', min_conf=0.1, - label_col=FALSE, show_confidence = FALSE, - colors = NULL, detector_labels = NULL){ + label_col=FALSE, show_confidence=FALSE, + colors=NULL, detector_labels=NULL){ animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$plot_all_bounding_boxes(manifest, out_dir, file_col=file_col, min_conf=min_conf, label_col=label_col, show_confidence=show_confidence, diff --git a/man/export_coco.Rd b/man/export_coco.Rd new file mode 100644 index 0000000..a68609f --- /dev/null +++ b/man/export_coco.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/export.R +\name{export_coco} +\alias{export_coco} +\title{Export a manifest to COCO format.} +\usage{ +export_coco(manifest, class_list, out_file, info = NULL, licenses = NULL) +} +\arguments{ +\item{manifest}{dataframe containing images and associated predictions} + +\item{class_list}{dataframe containing class names and their corresponding IDs} + +\item{out_file}{path to save the COCO formatted file} + +\item{info}{optional info section of COCO file} + +\item{licenses}{optional licenses section of COCO file} +} +\value{ +coco formatted json file saved to out_file +} +\description{ +Export a manifest to COCO format. +} +\examples{ +\dontrun{ +export_coco(manifest, classes, "path/to/out.json") +} +} diff --git a/man/export_megadetector.Rd b/man/export_megadetector.Rd index abe2ecf..14b8271 100644 --- a/man/export_megadetector.Rd +++ b/man/export_megadetector.Rd @@ -7,7 +7,8 @@ export_megadetector( manifest, output_file = NULL, - detector = "MegaDetector v5a" + detector = "MegaDetector v5a", + prompt = TRUE ) } \arguments{ @@ -16,6 +17,8 @@ export_megadetector( \item{output_file}{path to save the MD formatted file} \item{detector}{name of the detector model used} + +\item{prompt}{whether to prompt before overwriting existing file} } \value{ None diff --git a/man/extract_frames.Rd b/man/extract_frames.Rd index e42210e..850511f 100644 --- a/man/extract_frames.Rd +++ b/man/extract_frames.Rd @@ -8,8 +8,8 @@ extract_frames( files, frames = 5, fps = NULL, - out_dir = NULL, out_file = NULL, + out_dir = NULL, file_col = "filepath", parallel = FALSE, num_workers = 1 @@ -22,10 +22,10 @@ extract_frames( \item{fps}{frames per second, otherwise determine mathematically} -\item{out_dir}{directory to save frames to if not null} - \item{out_file}{csv file to which results will be saved} +\item{out_dir}{directory to save frames to if not null} + \item{file_col}{string value indexing which column contains file paths} \item{parallel}{Toggle for parallel processing, defaults to FALSE} diff --git a/man/load_classifier.Rd b/man/load_classifier.Rd index 3d0ac54..d701683 100644 --- a/man/load_classifier.Rd +++ b/man/load_classifier.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/classification.R \name{load_classifier} \alias{load_classifier} -\title{Load a Classifier Model with animl-py} +\title{Load a Classifier Model and Class_list} \usage{ load_classifier(model_path, classes, device = NULL, architecture = "CTL") } @@ -16,10 +16,10 @@ load_classifier(model_path, classes, device = NULL, architecture = "CTL") \item{architecture}{model architecture} } \value{ -classifier model +classifier model, class list } \description{ -Load a Classifier Model with animl-py +Load a Classifier Model and Class_list } \examples{ \dontrun{ diff --git a/man/load_detector.Rd b/man/load_detector.Rd index 69090e2..7b35ec2 100644 --- a/man/load_detector.Rd +++ b/man/load_detector.Rd @@ -9,12 +9,12 @@ load_detector(model_path, model_type, device = NULL) \arguments{ \item{model_path}{path to detector model file} -\item{model_type}{type of model expected ie "MDV5", "MDV6", "YOLO"} +\item{model_type}{type of model expected ie "MDV5", "MDV6", "YOLO", "ONNX"} \item{device}{specify to run on cpu or gpu} } \value{ -megadetector object +detector object } \description{ Load an Object Detector diff --git a/man/load_json.Rd b/man/load_json.Rd new file mode 100644 index 0000000..123ec3f --- /dev/null +++ b/man/load_json.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_management.R +\name{load_json} +\alias{load_json} +\title{Load data from a JSON file.} +\usage{ +load_json(file) +} +\arguments{ +\item{file}{the full path of the file to load} +} +\value{ +loaded json file +} +\description{ +Load data from a JSON file. +} +\examples{ +\dontrun{ +mdraw <- load_json('mdraw.json') +} +} diff --git a/man/load_miew.Rd b/man/load_miew.Rd index 51177af..9067b61 100644 --- a/man/load_miew.Rd +++ b/man/load_miew.Rd @@ -9,7 +9,7 @@ load_miew(file_path, device = NULL) \arguments{ \item{file_path}{path to model weights} -\item{device}{toggle cpu or gpu} +\item{device}{device to load model to} } \value{ meiwid model diff --git a/man/parse_detections.Rd b/man/parse_detections.Rd index 00f71fd..97bcb62 100644 --- a/man/parse_detections.Rd +++ b/man/parse_detections.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/detection.R \name{parse_detections} \alias{parse_detections} -\title{parse MD results into a simple dataframe} +\title{Parse MD results into a simple dataframe} \usage{ parse_detections( results, @@ -15,9 +15,9 @@ parse_detections( \arguments{ \item{results}{json output from megadetector} -\item{manifest}{dataframe containing all frames} +\item{manifest}{optional dataframe containing all frames} -\item{out_file}{path to save dataframe} +\item{out_file}{optional path to save dataframe} \item{threshold}{confidence threshold to include bbox} @@ -27,7 +27,7 @@ parse_detections( original dataframe including md results } \description{ -parse MD results into a simple dataframe +Parse MD results into a simple dataframe } \examples{ \dontrun{ diff --git a/man/save_json.Rd b/man/save_json.Rd new file mode 100644 index 0000000..5ec0d39 --- /dev/null +++ b/man/save_json.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_management.R +\name{save_json} +\alias{save_json} +\title{Save data to a JSON file.} +\usage{ +save_json(data, out_file, prompt = TRUE) +} +\arguments{ +\item{data}{the dictionary to be saved} + +\item{out_file}{full path to save file to} + +\item{prompt}{prompt user to confirm overwrite} +} +\value{ +None +} +\description{ +Save data to a JSON file. +} +\examples{ +\dontrun{ +save_json(mdresults, 'mdraw.json') +} +} diff --git a/man/update_labels_from_folders.Rd b/man/update_labels_from_folders.Rd index 5220322..d47133c 100644 --- a/man/update_labels_from_folders.Rd +++ b/man/update_labels_from_folders.Rd @@ -4,12 +4,12 @@ \alias{update_labels_from_folders} \title{Udate Results from File Browser} \usage{ -update_labels_from_folders(manifest, link_dir, unique_name = "uniquename") +update_labels_from_folders(manifest, export_dir, unique_name = "uniquename") } \arguments{ \item{manifest}{dataframe containing file data and predictions} -\item{link_dir}{directory to sort files into} +\item{export_dir}{directory to sort files into} \item{unique_name}{column name indicating a unique file name for each row} } @@ -21,6 +21,6 @@ Udate Results from File Browser } \examples{ \dontrun{ -results <- update_labels_from_folders(manifest, link_dir) +results <- update_labels_from_folders(manifest, export_dir) } } From e270cb34b7d1317625d2dc2218ba480cf870d6f9 Mon Sep 17 00:00:00 2001 From: tkswanson Date: Mon, 29 Dec 2025 08:16:08 -0800 Subject: [PATCH 06/12] merge home --- DESCRIPTION | 2 +- R/export.R | 65 +++++++++++++++++++++++++--- R/file_management.R | 14 +++--- R/install.R | 50 ++++++++++----------- R/split.R | 21 +++++---- R/video_processing.R | 8 ++-- R/zzz.R | 22 +++++++--- README.md | 84 +++++++++++++++++++++--------------- examples/Workflow_animl-py.R | 14 +++--- man/export_camtrapR.Rd | 45 +++++++++++++++++++ man/export_coco.Rd | 28 ++++++++++++ man/export_megadetector.Rd | 9 ++-- man/train_val_test.Rd | 22 ++++++---- 13 files changed, 274 insertions(+), 110 deletions(-) create mode 100644 man/export_camtrapR.Rd create mode 100644 man/export_coco.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 15795e2..b8edd1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: animl Title: A Collection of ML Tools for Conservation Research -Version: 3.0.1 +Version: 3.1.0 Authors@R: c(person(given="Kyra", family="Swanson",email="tswanson@sdzwa.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1496-3217")), person(given="Mathias",family="Tobler",role = "aut")) Description: Functions required to classify subjects within camera trap field data. The package can handle both images and videos. The authors recommend a two-step approach using Microsoft's 'MegaDector' model and then a second model trained on the classes of interest. diff --git a/R/export.R b/R/export.R index 91ba886..adc1219 100644 --- a/R/export.R +++ b/R/export.R @@ -73,20 +73,51 @@ update_labels_from_folders <- function(manifest, link_dir, unique_name='uniquena } -#' Converts the .csv file to the MD-formatted .json file. +#' Converts the .csv file to a COCO-formatted .json file. #' #' @param manifest dataframe containing images and associated detections -#' @param output_file path to save the MD formatted file -#' @param detector name of the detector model used +#' @param class_list dataframe containing class names and their corresponding IDs +#' @param out_file path to save the formatted file +#' @param info info section of COCO file, named list +#' @param licenses licenses section of COCO file, array #' -#' @return None +#' @return coco formated json #' @export #' #' @examples #' \dontrun{export_megadetector(manifest, output_file= 'results.json', detector='MDv6')} -export_megadetector <- function(manifest, output_file=NULL, detector='MegaDetector v5a'){ +export_coco <- function(manifest, class_list, out_file, info=NULL, licenses=NULL){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$export_megadetector(manifest, output_file=output_file, detector=detector) + animl_py$export_coco(manifest, class_list, out_file, info=info, licenses=licenses) +} + + + +#' Export data into sorted folders organized by station +#' +#' @param manifest dataframe containing images and associated predictions +#' @param out_dir directory to export sorted images +#' @param out_file if provided, save the manifest to this file +#' @param label_col column containing species labels +#' @param file_col column containing source paths +#' @param station_col column containing station names +#' @param unique_name column containing unique file name +#' @param copy if true, hard copy +#' +#' @returns manifest with link column +#' @export +#' +#' @examples +#' \dontrun{manifest <- export_camtrapR(manifest, out_dir, out_file=NULL, label_col='prediction', +#' file_col="filepath", station_col='station', +#' unique_name='uniquename', copy=FALSE)} +export_camtrapR <- function(manifest, out_dir, out_file=NULL, label_col='prediction', + file_col="filepath", station_col='station', + unique_name='uniquename', copy=FALSE){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$export_camtrapR(manifest, out_dir, out_file=out_file, label_col=label_col, + file_col=file_col, station_col=station_col, + unique_name=unique_name, copy=copy) } @@ -105,3 +136,25 @@ export_timelapse <- function(results, image_dir, only_animal=TRUE){ animl_py <- get("animl_py", envir = parent.env(environment())) animl_py$export_timelapse(results, image_dir, only_animal=only_animal) } + + +#' Converts the .csv file to the MD-formatted .json file. +#' +#' @param manifest dataframe containing images and associated detections +#' @param out_file path to save the MD formatted file +#' @param detector name of the detector model used +#' @param prompt ask user to overwrite existing file +#' +#' @return None +#' @export +#' +#' @examples +#' \dontrun{export_megadetector(manifest, output_file= 'results.json', detector='MDv6')} +export_megadetector <- function(manifest, out_file=NULL, + detector='MegaDetector v5a', prompt=TRUE){ + animl_py <- get("animl_py", envir = parent.env(environment())) + animl_py$export_megadetector(manifest, out_file=out_file, + detector=detector, prompt=prompt) +} + + diff --git a/R/file_management.R b/R/file_management.R index d53f4a7..24f340c 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -9,8 +9,8 @@ # #' @param image_dir folder to search through and find media files #' @param exif returns date and time information from exif data, defaults to true -#' @param offset add offset in hours for videos when using the File Modified date, defaults to 0 #' @param out_file directory to save .csv of manifest to +#' @param offset add offset in hours for videos when using the File Modified date, defaults to 0 #' @param recursive Should directories be scanned recursively? Default TRUE #' #' @return files dataframe with or without file dates @@ -56,12 +56,12 @@ WorkingDirectory <- function(workingdir, pkg.env) { dir.create(pkg.env$visdir, recursive = T, showWarnings = F) # Assign specific file paths - pkg.env$filemanifest <- paste0(pkg.env$basedir, "FileManifest.csv") - pkg.env$imageframes <- paste0(pkg.env$basedir, "ImageFrames.csv") - pkg.env$results <- paste0(pkg.env$basedir, "Results.csv") - pkg.env$predictions <- paste0(pkg.env$basedir, "Predictions.csv") - pkg.env$detections <- paste0(pkg.env$basedir, "Detections.csv") - pkg.env$mdraw <- paste0(pkg.env$basedir, "MD_Raw.json") + pkg.env$filemanifest_file <- paste0(pkg.env$basedir, "FileManifest.csv") + pkg.env$imageframes_file <- paste0(pkg.env$basedir, "ImageFrames.csv") + pkg.env$results_file <- paste0(pkg.env$basedir, "Results.csv") + pkg.env$predictions_file <- paste0(pkg.env$basedir, "Predictions.csv") + pkg.env$detections_file <- paste0(pkg.env$basedir, "Detections.csv") + pkg.env$mdraw_file <- paste0(pkg.env$basedir, "MD_Raw.json") } diff --git a/R/install.R b/R/install.R index 1a9be7c..8072c17 100644 --- a/R/install.R +++ b/R/install.R @@ -1,6 +1,5 @@ # VARIABLE FOR VERSION ANIML_VERSION <- "3.1.0" -animl_py <- NULL #' Create a miniconda environment for animl and install animl-py #' @@ -24,7 +23,8 @@ animl_install <- function(py_env = "animl_env", # 2. Install if not exists if (inherits(try_error, "try-error")) { - packageStartupMessage(sprintf("%s not found \n", py_env)) + packageStartupMessage(try_error) + #packageStartupMessage(sprintf("%s not found \n", py_env)) # 2. Create new environment packageStartupMessage("\n", sprintf("2. Creating a Python Environment (%s)", py_env)) animl_path <- tryCatch(expr = create_pyenv(python_version = python_version, py_env = py_env), @@ -42,8 +42,21 @@ animl_install <- function(py_env = "animl_env", } # conda env exists else{ - # check animl version - update_animl_py() + # check animl-py installed + packageStartupMessage("\n2. Checking animl-py version...") + if(reticulate::py_module_available("animl")){ + animl_py <- reticulate::import("animl", delay_load = TRUE) + py_version <- animl_py$'__version__' + # check version match + if (!identical(animl_version, py_version)){update_animl_py()} + } + # animl-py not yet installed + else{ + packageStartupMessage("\n3. Installing animl-py...") + package <- sprintf("animl==%s", animl_version) + reticulate::use_condaenv(py_env) + reticulate::py_install(package, pip=TRUE) + } return(TRUE) } } @@ -59,11 +72,13 @@ animl_install <- function(py_env = "animl_env", load_animl_py <- function() { if(reticulate::py_module_available("animl")){ animl_py <- reticulate::import("animl", delay_load = TRUE) + packageStartupMessage("animl-py loaded successfully.") + return(animl_py) + } + else{ + packageStartupMessage('Animl load failed') + packageStartupMessage(reticulate::py_config()) } - else{ stop('animl_env environment must be loaded first via reticulate') } - - packageStartupMessage("animl-py loaded successfully.") - return(animl_py) } @@ -80,22 +95,9 @@ load_animl_py <- function() { update_animl_py <- function(py_env = "animl_env", animl_version = ANIML_VERSION) { # load animl-py, check version - animl_py <- reticulate::import("animl", delay_load = TRUE) - version_error <- try(animl_py$'__version__') - if (inherits(version_error, "try-error")){ - print("animl-py version: ", version_error) - reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) - } - else{ - r_version <- strsplit(animl_version, ".", fixed = TRUE)[[1]] - py_version <- strsplit(version_error, ".", fixed = TRUE)[[1]] - - #r == py - if (!identical(r_version, py_version)){ - packageStartupMessage("animl-py version mismatch, reinstalling...") - reticulate::py_install(sprintf("animl==%s", animl_version), envname=py_env, pip=TRUE) - } - } + packageStartupMessage("animl-py version mismatch, reinstalling...") + reticulate::use_condaenv(py_env) + reticulate::py_install(sprintf("animl==%s", animl_version), pip=TRUE) } diff --git a/R/split.R b/R/split.R index 9c0bb6f..2c3a6da 100644 --- a/R/split.R +++ b/R/split.R @@ -59,23 +59,26 @@ get_animals <- function(manifest){ #' Splits the manifest into training validation and test datasets for training #' #' @param manifest list of files to split for training -#' @param out_dir location to save split lists to #' @param label_col column name containing class labels #' @param file_col column containing file paths -#' @param percentage fraction of data dedicated to train-val-test -#' @param seed RNG seed, if none will pick one at random +#' @param conf_col column containing prediction confidence +#' @param out_dir location to save split lists to +#' @param val_size fraction of data dedicated to validation +#' @param test_size fraction of data dedicated to testing +#' @param random_state RNG seed for reproducibility #' -#' @return train manifest, validate manifest, test manifest, stats file +#' @return train manifest, validate manifest, test manifest #' @export #' #' @examples #' \dontrun{ #' output <- train_val_test(manifest) #' } -train_val_test <- function(manifest, out_dir=NULL, label_col="class", - file_col='filepath', percentage=c(0.7, 0.2, 0.1), - seed=NULL){ +train_val_test <- function(manifest, label_col="class", file_col='filepath', + conf_col = 'confidence', out_dir=NULL, + val_size= 0.1, test_size = 0.1, random_state=42){ animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$train_val_test(manifest, out_dir=out_dir, label_col=label_col, - file_col=file_col, percentage=percentage, seed=NULL) + animl_py$train_val_test(manifest, label_col="class", file_col='filepath', + conf_col = 'confidence', out_dir=NULL, + val_size= 0.1, test_size = 0.1, random_state=42) } diff --git a/R/video_processing.R b/R/video_processing.R index 583fb08..ca6aa48 100644 --- a/R/video_processing.R +++ b/R/video_processing.R @@ -3,8 +3,8 @@ #' @param files dataframe of videos #' @param frames number of frames to sample #' @param fps frames per second, otherwise determine mathematically -#' @param out_dir directory to save frames to if not null #' @param out_file csv file to which results will be saved +#' @param out_dir directory to save frames to if not null #' @param file_col string value indexing which column contains file paths #' @param parallel Toggle for parallel processing, defaults to FALSE #' @param num_workers number of processors to use if parallel, defaults to 1 @@ -16,12 +16,12 @@ #' \dontrun{ #' frames <- extract_frames(manifest, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) #' } -extract_frames <- function(files, frames=5, fps = NULL, out_dir = NULL, out_file = NULL, - file_col="filepath", parallel = FALSE, num_workers = 1){ +extract_frames <- function(files, frames=5, fps=NULL, out_file=NULL, out_dir=NULL, + file_col="filepath", parallel=FALSE, num_workers=1){ if (!is.null(fps)){ fps <- as.integer(fps) } if (!is.null(frames)){ frames <- as.integer(frames) } animl_py <- get("animl_py", envir = parent.env(environment())) - animl_py$extract_frames(files, frames=frames, fps=fps, out_dir, out_file=out_file, + animl_py$extract_frames(files, frames=frames, fps=fps, out_file=out_file, out_dir=out_dir, file_col=file_col, parallel=parallel, num_workers=as.integer(num_workers)) } diff --git a/R/zzz.R b/R/zzz.R index 843c83f..299c8e6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,7 +1,19 @@ .onLoad <- function(libname, pkgname) { - animl_env_avail <- animl_install() - if (animl_env_avail) { - animl_py <- load_animl_py() - assign("animl_py", animl_py, envir = parent.env(environment())) - } + animl_env_avail <- animl_install() + if (animl_env_avail) { + animl_py <- load_animl_py() + assign("animl_py", animl_py, envir = parent.env(environment())) + } + else{ + .stop_animl_failed() + } } + +.stop_animl_failed <- function(){ + if (interactive()) { + warning('animl_env load failed') + } else { + # non-interactive / scripts should get a clean error + stop('animl_env load failed', call. = FALSE) + } +} \ No newline at end of file diff --git a/README.md b/README.md index d626bf1..d775914 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# animl v3.0.0 +# animl v3.1.1 Animl comprises a variety of machine learning tools for analyzing ecological data. The package includes a set of functions to classify subjects within camera trap field data and can handle both images and videos. @@ -27,11 +27,11 @@ imagedir <- "examples/TestData" WorkingDirectory(imagedir, globalenv()) # Read exif data for all images within base directory -files <- build_file_manifest(imagedir, out_file=filemanifest, exif=TRUE) +files <- build_file_manifest(imagedir, out_file=filemanifest_file, exif=TRUE) # Process videos, extract frames for ID -allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, - frames=3, parallel=T, num_workers=parallel::detectCores()) +allframes <- extract_frames(files, frames=3, out_file=imageframes_file, + parallel=T, num_workers=parallel::detectCores()) ``` #### 2. Object Detection @@ -44,13 +44,13 @@ More info on
```R #Load the Megadetector model -md_py <- load_detector("/Models/md_v5a.0.0.pt", model_type = 'mdv5', device='cuda:0') +detector <- load_detector("/Models/md_v5a.0.0.pt", model_type = 'mdv5', device='cuda:0') # Obtain crop information for each image -mdraw <- detect(md_py, allframes, resize_width=1280, resize_height=960, batch_size=4, device='cuda:0') +mdraw <- detect(detector, allframes, resize_width=1280, resize_height=960, batch_size=4, device='cuda:0') # Add crop information to dataframe -mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections) +mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections_file) ``` #### 3. Classification @@ -70,13 +70,14 @@ class_list <- classes$class # load the model model_file <- "/Models/Southwest/v3/southwest_v3.pt" -southwest <- load_classifier(model_file, len(class_list)) +southwest <- load_classifier(model_file, nrow(class_list)) # obtain species predictions likelihoods -pred_raw <- classify(southwest, animals, resize_width=480, resize_height=480, out_file=predictions, batch_size=16, num_workers=8) +pred_raw <- classify(southwest, animals, resize_width=480, resize_height=480, + out_file=predictions_file, batch_size=16, num_workers=8) # apply class_list labels and combine with empty set -manifest <- single_classification(animals, empty, pred_raw, class_list) +manifest <- single_classification(animals, empty, pred_raw, class_list$class) ``` If your data includes videos or sequences, we recommend using the sequence_classification algorithm. @@ -84,9 +85,36 @@ This requires the raw output of the prediction algorithm. ```R # Sequence Classification -manifest <- sequence_classification(animals, empty=empty, pred_raw, classes=class_list, station_col="station", empty_class="empty") +manifest <- sequence_classification(animals, empty=empty, pred_raw, classes=class_list, + station_col="station", empty_class="empty") ``` +#### 4. Export + +You can export the data into folders sorted by prediction: +``` +manifest <- export_folders(manifest, out_dir=linkdir, out_file=results_file) +``` +or into folders sorted by prediction and by station for export to camtrapR: +``` +manifest <- export_camtrapR(manifest, out_dir=linkdir, out_file=results_file, + label_col='prediction', file_col="filepath", station_col='station') +``` + +You can also export a .json file formatted for COCO +``` +manifest <- export_coco(manifest, class_list=class_list, out_file='results.json') +``` +Or a .csv file for Timelapse +``` +manifest <- export_folders(manifest, out_dir=linkdir) +``` + + + + + + # Models The Conservation Technology Lab has several [models](https://sandiegozoo.app.box.com/s/9f3xuqldvg9ysaix9c9ug8tdcrmc2eqx) available for use.

@@ -100,44 +128,32 @@ Detectors: ### Requirements * R >= 4.0 * Reticulate -* Python >= 3.9 -* [Animl-Py >= 3.0.0](https://github.com/conservationtechlab/animl-py) +* Python >= 3.12 +* [Animl-Py >= 3.1.1](https://github.com/conservationtechlab/animl-py) We recommend running animl on a computer with a dedicated GPU. ### Python -animl depends on python and will install python package dependencies if they are not available if installed via CRAN.
-However, we recommend setting up a conda environment using the provided config file. +animl depends on python and will install python package dependencies if they are not available if installed via miniconda.
-[Instructions to install conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) - -The R version of animl depends on the python version to handle the machine learning: +The R version of animl also depends on the python version to handle the machine learning: [animl-py](https://github.com/conservationtechlab/animl-py) -Next, install animl-py in your preferred python environment (such as conda) using pip: -``` -pip install animl -``` - Animl-r can be installed through CRAN: ```R install.packages('animl') ``` +Animl will install animl-py and associated dependencies. + Animl-r can also be installed by downloading this repo, opening the animl.Rproj file in RStudio and selecting Build -> Install Package. # Release Notes -## New for 3.0.0 - - compatible with animl-py v3.0.0 - - remove package dependencies - - on load, also load animl-py - - change function name "predict_species" to "classify" - - add "load_detector" function that can handle MDv5, v6, v1000 and other YOLO models - - change "sort" to "export" - - add function to install animl-py and create conda env if does not exist - - add distance calculation functions for re-id - - fix bug in sequence_classification that mishandled overlap in classifier classes with megadetector classes - - changed function naming conventions to follow animl-py +## New for 3.1.1 + - compatible with animl-py v3.1.1 + - add export_camtrapR() + - handle on the fly video frame generation + - bug fixes - correct examples and documentation to reflect above changes diff --git a/examples/Workflow_animl-py.R b/examples/Workflow_animl-py.R index 9a25997..77cd200 100644 --- a/examples/Workflow_animl-py.R +++ b/examples/Workflow_animl-py.R @@ -8,18 +8,14 @@ # Setup #------------------------------------------------------------------------------- library(animl) -library(reticulate) -use_condaenv("animl-gpu") -animl_py <- load_animl_py() - -imagedir <- "\\examples\\Southwest" +imagedir <- "C:\\Users\\Kyra\\animl\\examples\\Southwest" #create global variable file and directory namesfrom animl import file_management WorkingDirectory(imagedir, globalenv()) # Build file manifest for all images and videos within base directory -files <- build_file_manifest(imagedir, out_file=filemanifest, exif=TRUE) +files <- build_file_manifest(imagedir, out_file=filemanifest_file, exif=TRUE) #=============================================================================== # Add Project-Specific Info @@ -30,8 +26,8 @@ files <- build_file_manifest(imagedir, out_file=filemanifest, exif=TRUE) #files$Station <- sapply(files$FilePath, function(x) strsplit(x,"/")[[1]][basedepth]) # Process videos, extract frames for ID -allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, - frames=3, parallel=T, num_workers=parallel::detectCores()) +allframes <- extract_frames(files, frames=3, out_file=imageframes_file, + parallel=T, num_workers=parallel::detectCores()) #=============================================================================== @@ -41,7 +37,7 @@ allframes <- extract_frames(files, out_dir = vidfdir, out_file=imageframes, # MD, specify detectObjectBatch with argument 'mdversion'. # PyTorch Via Animl-Py -md_py <- load_detector("~/models/md_v5a.0.0.pt", model_type = 'mdv5') +md_py <- load_detector("C://Users//Kyra//animl-py//models//md_v5a.0.0.pt", model_type = 'mdv5') mdraw <- detect(md_py, allframes, 1280, 1280, batch_size=4) mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections) diff --git a/man/export_camtrapR.Rd b/man/export_camtrapR.Rd new file mode 100644 index 0000000..e9e84be --- /dev/null +++ b/man/export_camtrapR.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/export.R +\name{export_camtrapR} +\alias{export_camtrapR} +\title{Export data into sorted folders organized by station} +\usage{ +export_camtrapR( + manifest, + out_dir, + out_file = NULL, + label_col = "prediction", + file_col = "filepath", + station_col = "station", + unique_name = "uniquename", + copy = FALSE +) +} +\arguments{ +\item{manifest}{dataframe containing images and associated predictions} + +\item{out_dir}{directory to export sorted images} + +\item{out_file}{if provided, save the manifest to this file} + +\item{label_col}{column containing species labels} + +\item{file_col}{column containing source paths} + +\item{station_col}{column containing station names} + +\item{unique_name}{column containing unique file name} + +\item{copy}{if true, hard copy} +} +\value{ +manifest with link column +} +\description{ +Export data into sorted folders organized by station +} +\examples{ +\dontrun{manifest <- export_camtrapR(manifest, out_dir, out_file=NULL, label_col='prediction', + file_col="filepath", station_col='station', + unique_name='uniquename', copy=FALSE)} +} diff --git a/man/export_coco.Rd b/man/export_coco.Rd new file mode 100644 index 0000000..be98cbf --- /dev/null +++ b/man/export_coco.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/export.R +\name{export_coco} +\alias{export_coco} +\title{Converts the .csv file to a COCO-formatted .json file.} +\usage{ +export_coco(manifest, class_list, out_file, info = NULL, licenses = NULL) +} +\arguments{ +\item{manifest}{dataframe containing images and associated detections} + +\item{class_list}{dataframe containing class names and their corresponding IDs} + +\item{out_file}{path to save the formatted file} + +\item{info}{info section of COCO file, named list} + +\item{licenses}{licenses section of COCO file, array} +} +\value{ +coco formated json +} +\description{ +Converts the .csv file to a COCO-formatted .json file. +} +\examples{ +\dontrun{export_megadetector(manifest, output_file= 'results.json', detector='MDv6')} +} diff --git a/man/export_megadetector.Rd b/man/export_megadetector.Rd index abe2ecf..2940938 100644 --- a/man/export_megadetector.Rd +++ b/man/export_megadetector.Rd @@ -6,16 +6,19 @@ \usage{ export_megadetector( manifest, - output_file = NULL, - detector = "MegaDetector v5a" + out_file = NULL, + detector = "MegaDetector v5a", + prompt = TRUE ) } \arguments{ \item{manifest}{dataframe containing images and associated detections} -\item{output_file}{path to save the MD formatted file} +\item{out_file}{path to save the MD formatted file} \item{detector}{name of the detector model used} + +\item{prompt}{ask user to overwrite existing file} } \value{ None diff --git a/man/train_val_test.Rd b/man/train_val_test.Rd index f8c0566..0db528e 100644 --- a/man/train_val_test.Rd +++ b/man/train_val_test.Rd @@ -6,28 +6,34 @@ \usage{ train_val_test( manifest, - out_dir = NULL, label_col = "class", file_col = "filepath", - percentage = c(0.7, 0.2, 0.1), - seed = NULL + conf_col = "confidence", + out_dir = NULL, + val_size = 0.1, + test_size = 0.1, + random_state = 42 ) } \arguments{ \item{manifest}{list of files to split for training} -\item{out_dir}{location to save split lists to} - \item{label_col}{column name containing class labels} \item{file_col}{column containing file paths} -\item{percentage}{fraction of data dedicated to train-val-test} +\item{conf_col}{column containing prediction confidence} + +\item{out_dir}{location to save split lists to} + +\item{val_size}{fraction of data dedicated to validation} + +\item{test_size}{fraction of data dedicated to testing} -\item{seed}{RNG seed, if none will pick one at random} +\item{random_state}{RNG seed for reproducibility} } \value{ -train manifest, validate manifest, test manifest, stats file +train manifest, validate manifest, test manifest } \description{ Splits the manifest into training validation and test datasets for training From 676e66a281cc73f15a0909d02564859aab788115 Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Mon, 29 Dec 2025 15:09:13 -0800 Subject: [PATCH 07/12] preemptive update to 3.1.2 --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/file_management.R | 12 ++++++------ R/install.R | 2 +- examples/Workflow_animl-py.R | 28 ++++++++++++++++++---------- man/export_megadetector.Rd | 2 +- 6 files changed, 28 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b8edd1f..bba96ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: animl Title: A Collection of ML Tools for Conservation Research -Version: 3.1.0 +Version: 3.1.2 Authors@R: c(person(given="Kyra", family="Swanson",email="tswanson@sdzwa.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1496-3217")), person(given="Mathias",family="Tobler",role = "aut")) Description: Functions required to classify subjects within camera trap field data. The package can handle both images and videos. The authors recommend a two-step approach using Microsoft's 'MegaDector' model and then a second model trained on the classes of interest. diff --git a/NAMESPACE b/NAMESPACE index 792ca69..00cde2d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(delete_pyenv) export(detect) export(download_model) export(euclidean_squared_distance) +export(export_camtrapR) export(export_coco) export(export_folders) export(export_megadetector) diff --git a/R/file_management.R b/R/file_management.R index 05241a6..be8eb9b 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -56,12 +56,12 @@ WorkingDirectory <- function(workingdir, pkg.env) { dir.create(pkg.env$visdir, recursive = T, showWarnings = F) # Assign specific file paths - pkg.env$filemanifest_file <- paste0(pkg.env$basedir, "FileManifest.csv") - pkg.env$imageframes_file <- paste0(pkg.env$basedir, "ImageFrames.csv") - pkg.env$results_file <- paste0(pkg.env$basedir, "Results.csv") - pkg.env$predictions_file <- paste0(pkg.env$basedir, "Predictions.csv") - pkg.env$detections_file <- paste0(pkg.env$basedir, "Detections.csv") - pkg.env$mdraw_file <- paste0(pkg.env$basedir, "MD_Raw.json") + pkg.env$filemanifest_file <- paste0(basedir, "FileManifest.csv") + pkg.env$imageframes_file <- paste0(basedir, "ImageFrames.csv") + pkg.env$results_file <- paste0(basedir, "Results.csv") + pkg.env$predictions_file <- paste0(basedir, "Predictions.csv") + pkg.env$detections_file <- paste0(basedir, "Detections.csv") + pkg.env$mdraw_file <- paste0(basedir, "MD_Raw.json") } diff --git a/R/install.R b/R/install.R index 8072c17..2660aed 100644 --- a/R/install.R +++ b/R/install.R @@ -1,5 +1,5 @@ # VARIABLE FOR VERSION -ANIML_VERSION <- "3.1.0" +ANIML_VERSION <- "3.1.2" #' Create a miniconda environment for animl and install animl-py #' diff --git a/examples/Workflow_animl-py.R b/examples/Workflow_animl-py.R index 77cd200..72e37ae 100644 --- a/examples/Workflow_animl-py.R +++ b/examples/Workflow_animl-py.R @@ -10,6 +10,7 @@ library(animl) imagedir <- "C:\\Users\\Kyra\\animl\\examples\\Southwest" +imagedir <- "examples/Southwest" #create global variable file and directory namesfrom animl import file_management WorkingDirectory(imagedir, globalenv()) @@ -37,30 +38,31 @@ allframes <- extract_frames(files, frames=3, out_file=imageframes_file, # MD, specify detectObjectBatch with argument 'mdversion'. # PyTorch Via Animl-Py -md_py <- load_detector("C://Users//Kyra//animl-py//models//md_v5a.0.0.pt", model_type = 'mdv5') +md_py <- load_detector("/home/kyra/models/md_v5a.0.0.pt", model_type = 'mdv5') mdraw <- detect(md_py, allframes, 1280, 1280, batch_size=4) -mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections) +mdresults <- parse_detections(mdraw, manifest = allframes, out_file = detections_file) -#mdresults <- read.csv(detections) +mdresults <- read.csv(detections_file) #mdresults$Station <- sapply(mdresults$FilePath, function(x) strsplit(x,"/")[[1]][5]) #select animal crops for classification animals <- get_animals(mdresults) empty <- get_empty(mdresults) + #=============================================================================== # Species Classifier #=============================================================================== -classes <- load_class_list('~/models/sdzwa_southwest_v3_classes.csv') -class_list <- classes$class -southwest <- load_classifier('~/models/sdzwa_southwest_v3.pt', length(class_list)) +southwest <- load_classifier('/home/kyra/models/sdzwa_southwest_v3.pt', '/home/kyra/models/sdzwa_southwest_v3_classes.csv') +sw_model <- southwest[[1]] +classes <- southwest[[2]] # get likelihoods -pred_raw <- classify(southwest, animals, resize_width=299, resize_height=299, out_file=predictions, batch_size=4) +pred_raw <- classify(sw_model, animals, resize_width=299, resize_height=299, out_file=predictions_file, batch_size=4) # Single Classification -manifest <- single_classification(animals, empty, pred_raw, class_list) +manifest <- single_classification(animals, empty, pred_raw, classes$class) animals$station <- 'test' empty$station <- 'test' @@ -79,6 +81,12 @@ write.csv(alldata, results) #=============================================================================== # REID #=============================================================================== -miew = load_miew("~/models/miewid_v3.bin") -embeddings = extract_embeddings(miew, manifest) +miew = load_classifier("/home/kyra/models/miewid_v3.onnx", NULL)[[1]] +miew <- load_miew("/home/kyra/models/miewid_v3.bin") + +emb <- classify(miew, animals, 440, 440) + +pred_raw <- classify(sw_model, animals, resize_width=299, resize_height=299, out_file=predictions_file, batch_size=4) + +embeddings = extract_miew_embeddings(miew, manifest) diff --git a/man/export_megadetector.Rd b/man/export_megadetector.Rd index 176a9e9..2940938 100644 --- a/man/export_megadetector.Rd +++ b/man/export_megadetector.Rd @@ -18,7 +18,7 @@ export_megadetector( \item{detector}{name of the detector model used} -\item{prompt}{whether to prompt before overwriting existing file} +\item{prompt}{ask user to overwrite existing file} } \value{ None From be8e73a7e996665415c3fad7465d6d6dc1648425 Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Wed, 7 Jan 2026 16:48:50 -0800 Subject: [PATCH 08/12] remove datetime correction (fixed in animl-py) --- .gitignore | 1 + R/file_management.R | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8efe7ea..45b3942 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.RData .Rhistory .Rproj.user inst/doc diff --git a/R/file_management.R b/R/file_management.R index be8eb9b..c54cb6c 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -24,7 +24,6 @@ build_file_manifest <- function(image_dir, exif=TRUE, out_file=NULL, offset=0, recursive=TRUE) { animl_py <- get("animl_py", envir = parent.env(environment())) manifest <- animl_py$build_file_manifest(image_dir, exif=exif, out_file=out_file, offset=offset, recursive=recursive) - manifest$datetime<-as.POSIXct(sapply(manifest$datetime, function(x) as.POSIXct(x))) # hotfix for type error return(manifest) } From 18d446a6f455aac1e9d790e3ab24e4874e736f8c Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Thu, 8 Jan 2026 12:35:14 -0800 Subject: [PATCH 09/12] fix path --- R/file_management.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/file_management.R b/R/file_management.R index c54cb6c..15e2fe8 100644 --- a/R/file_management.R +++ b/R/file_management.R @@ -38,7 +38,7 @@ build_file_manifest <- function(image_dir, exif=TRUE, out_file=NULL, #' #' @examples #' \dontrun{ -#' WorkingDirectory(/home/kyra/animl/examples) +#' WorkingDirectory("/home/kyra/animl/examples") #' } WorkingDirectory <- function(workingdir, pkg.env) { From 5d30365ad79ee555fa87f2d90994f05748f0fc33 Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Wed, 14 Jan 2026 15:34:22 -0800 Subject: [PATCH 10/12] Downgrade version from 3.1.2 to 3.1.1 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bba96ad..6918ff7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: animl Title: A Collection of ML Tools for Conservation Research -Version: 3.1.2 +Version: 3.1.1 Authors@R: c(person(given="Kyra", family="Swanson",email="tswanson@sdzwa.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1496-3217")), person(given="Mathias",family="Tobler",role = "aut")) Description: Functions required to classify subjects within camera trap field data. The package can handle both images and videos. The authors recommend a two-step approach using Microsoft's 'MegaDector' model and then a second model trained on the classes of interest. From 0a72a7fc32ac120500721e135fd366506b2f028d Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Wed, 14 Jan 2026 15:34:58 -0800 Subject: [PATCH 11/12] Update ANIML_VERSION from 3.1.2 to 3.1.1 --- R/install.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/install.R b/R/install.R index 2660aed..e5916d8 100644 --- a/R/install.R +++ b/R/install.R @@ -1,5 +1,5 @@ # VARIABLE FOR VERSION -ANIML_VERSION <- "3.1.2" +ANIML_VERSION <- "3.1.1" #' Create a miniconda environment for animl and install animl-py #' From 05f7a17fcacd435605bd5c4645cbfdc89fe80b6b Mon Sep 17 00:00:00 2001 From: Kyra Swanson Date: Fri, 16 Jan 2026 09:53:12 -0800 Subject: [PATCH 12/12] hardcode class type conversion in load_classifier function for int --- R/classification.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/classification.R b/R/classification.R index 328f09e..2e0f06b 100644 --- a/R/classification.R +++ b/R/classification.R @@ -14,6 +14,8 @@ #' andes <- load_classifier('andes_v1.pt', nrow(classes))} load_classifier <- function(model_path, classes, device=NULL, architecture="CTL"){ animl_py <- get("animl_py", envir = parent.env(environment())) + + if(is.numeric(classes)){ classes = as.integer(classes)} animl_py$load_classifier(model_path, classes, device=device, architecture=architecture) }