From 77ea3223fa7766095e1abc3dd44ed1a164d0f9cc Mon Sep 17 00:00:00 2001 From: Finn Roberts Date: Fri, 31 Oct 2025 09:17:56 +0100 Subject: [PATCH 1/3] Resolves #146; new interpolation approach fixes nearest-neighbor interpolation bug and improves speed --- R/align_move.R | 78 ++++++++++++++++++-------------- tests/testthat/test-align_move.R | 18 ++++++++ 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/R/align_move.R b/R/align_move.R index b4ee79e..0e6cc44 100644 --- a/R/align_move.R +++ b/R/align_move.R @@ -181,41 +181,35 @@ align_move <- function(m, res = "minimum", start_end_time = NULL, fill_na_values m_aligned <- m_aligned[order(m_aligned$timestamp),] m_aligned <- m_aligned[order(mt_track_id(m_aligned)),] - # fill variables - if(isTRUE(fill_na_values)){ - m_aligend_filled <- lapply(split(m_aligned, mt_track_id(m_aligned)), function(m_track){ - for(x in names_attr){ - this_attr <- m_track[[x]] - - m_track[[x]] <- sapply(1:length(this_attr), function(i){ - if(!is.na(this_attr[i])){ - this_attr[i] - } else{ - left <- if(i == 1) NULL else 1:(i-1) - right <- if(i == length(this_attr)) NULL else (i+1):length(this_attr) - - if(!is.null(left)){ - non_na <- left[which(!is.na(this_attr[left]))[1]] - } else non_na <- NULL - if(!is.null(right)){ - non_na <- c(non_na, right[which(!is.na(this_attr[right]))[1]]) - } - - non_na_diff <- abs(sapply(non_na, function(.non_na){ - difftime( - m_track[[mt_time_column(m_track)]][.non_na], - m_track[[mt_time_column(m_track)]][i], - units = "secs" - ) - })) - - this_attr[non_na[which.min(non_na_diff)]] + if (isTRUE(fill_na_values) && length(names_attr) > 0) { + m_aligned_filled <- split(m_aligned, mt_track_id(m_aligned)) + + # All interpolated attributes should have NA for same records for a given + # track. We only need one attribute vector to identify which records + # are NA. Use the first attribute: + attr1 <- names_attr[[1]] + + # Split by track to avoid interpolating values across tracks + for (i in seq(1, length(m_aligned_filled))) { + # Identify closest timestamps for each missing record + idx <- nearest_time_idx( + m_aligned_filled[[i]][[attr1]], + mt_time(m_aligned_filled[[i]]) + ) + + # Select most proximate available records and reassign for each attribute + invisible( + lapply( + names_attr, + function(att) { + x <- m_aligned_filled[[i]][[att]] + m_aligned_filled[[i]][[att]][is.na(x)] <<- x[!is.na(x)][idx] } - }) - } - return(m_track) - }) - m_aligned <- do.call(rbind, m_aligend_filled) + ) + ) + } + + m_aligned <- do.call(rbind, m_aligned_filled) } # for now, we just return the aligned data @@ -223,4 +217,20 @@ align_move <- function(m, res = "minimum", start_end_time = NULL, fill_na_values m_aligned$interpolated <- NULL return(m_aligned) +} + +# Helper to identify index position of the temporally closest filled records +# using an input vector `x` with values to use for interpolation (and `NA` +# values at locations that need interpolation) and an input timestamp vector +# `time` that is used to identify the closest temporal records in `x`. +nearest_time_idx <- function(x, time) { + stopifnot(length(x) == length(time)) + non_na <- !is.na(x) + if (all(non_na)) return(x) + + # numeric representation of time for distance comparisons + time <- as.numeric(time) + time_non_na <- time[non_na] + + sapply(time[!non_na], function(t) which.min(abs(time_non_na - t))) } \ No newline at end of file diff --git a/tests/testthat/test-align_move.R b/tests/testthat/test-align_move.R index 506ceed..ca3291e 100644 --- a/tests/testthat/test-align_move.R +++ b/tests/testthat/test-align_move.R @@ -33,3 +33,21 @@ test_that("align_move (default)", { expect_warning(align_move(m, digit = "max", verbose = F)) }) #} + +test_that("Fill most proximate value when `fill_na_vals = TRUE`", { + m <- move_data + m[["x"]] <- sample(100, size = nrow(m), replace = TRUE) + + a <- align_move(m, res = units::set_units(2, "min"), verbose = FALSE) + + # Check each track separately, as timestamps should not be matched + # for interpolation across tracks. Each interpolated value in `a` should + # match the value in `m` where the min timestamp criterion is met + for (track in unique(a$track)) { + a1 <- move2::filter_track_data(a, .track_id = track) + m1 <- move2::filter_track_data(m, .track_id = track) + + idx <- sapply(a1$timestamp, function(x) which.min(abs(x - m1$timestamp))) + expect_equal(a1$x, m1$x[idx]) + } +}) From a65d95c87aab1f1e5be1f3da46d149647393001b Mon Sep 17 00:00:00 2001 From: Finn Roberts Date: Mon, 3 Nov 2025 13:33:34 +0100 Subject: [PATCH 2/3] Avoid interpolating timestamps that already have recorded vals Speeds up interpolation significantly --- R/align_move.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/align_move.R b/R/align_move.R index 0e6cc44..f9e0dab 100644 --- a/R/align_move.R +++ b/R/align_move.R @@ -152,10 +152,10 @@ align_move <- function(m, res = "minimum", start_end_time = NULL, fill_na_values # assemble sf object m_aligned <- lapply(1:length(m_tracks), function(i) st_sf( - interpolated = c(rep(FALSE, nrow(m_tracks[[i]])), rep(TRUE, length(m_aligned[[i]]))), + interpolated = TRUE, track = names(m_tracks)[i], - timestamp = c(mt_time(m_tracks[[i]]), times_target[[i]]), - geometry = c(st_geometry(m_tracks[[i]]), m_aligned[[i]]) + timestamp = times_target[[i]], + geometry = m_aligned[[i]] )) m_aligned <- do.call(rbind, m_aligned) colnames(m_aligned) <- c("interpolated", mt_track_id_column(m), mt_time_column(m), attr(m, "sf_column")) From 2a22c9a2b629d12e8225c3f1760f1b8249cbf4a9 Mon Sep 17 00:00:00 2001 From: Finn Roberts Date: Thu, 13 Nov 2025 11:57:59 +0100 Subject: [PATCH 3/3] Interpolate each attribute individually Not all attributes will have same missing values --- R/align_move.R | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/R/align_move.R b/R/align_move.R index f9e0dab..2aa50c8 100644 --- a/R/align_move.R +++ b/R/align_move.R @@ -184,29 +184,18 @@ align_move <- function(m, res = "minimum", start_end_time = NULL, fill_na_values if (isTRUE(fill_na_values) && length(names_attr) > 0) { m_aligned_filled <- split(m_aligned, mt_track_id(m_aligned)) - # All interpolated attributes should have NA for same records for a given - # track. We only need one attribute vector to identify which records - # are NA. Use the first attribute: - attr1 <- names_attr[[1]] - # Split by track to avoid interpolating values across tracks for (i in seq(1, length(m_aligned_filled))) { - # Identify closest timestamps for each missing record - idx <- nearest_time_idx( - m_aligned_filled[[i]][[attr1]], - mt_time(m_aligned_filled[[i]]) - ) - - # Select most proximate available records and reassign for each attribute - invisible( - lapply( - names_attr, - function(att) { - x <- m_aligned_filled[[i]][[att]] - m_aligned_filled[[i]][[att]][is.na(x)] <<- x[!is.na(x)][idx] - } + for (att in names_attr) { + # Identify closest timestamps for each missing record + idx <- nearest_time_idx( + m_aligned_filled[[i]][[att]], + mt_time(m_aligned_filled[[i]]) ) - ) + + x <- m_aligned_filled[[i]][[att]] + m_aligned_filled[[i]][[att]][is.na(x)] <- x[!is.na(x)][idx] + } } m_aligned <- do.call(rbind, m_aligned_filled) @@ -226,7 +215,7 @@ align_move <- function(m, res = "minimum", start_end_time = NULL, fill_na_values nearest_time_idx <- function(x, time) { stopifnot(length(x) == length(time)) non_na <- !is.na(x) - if (all(non_na)) return(x) + if (all(non_na) || all(!non_na)) return(x) # numeric representation of time for distance comparisons time <- as.numeric(time)