Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 77 additions & 4 deletions R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,13 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL,
#' @param options A named list of analysis options, typically from \code{analysisOptions()}.
#' @param dataset A data.frame or the name/path of a dataset to be encoded.
#'
#' @param forceEncode Optional character vector of option names that should be
#' forcibly encoded using regular expression replacement. This is useful for
#' options like \code{model} that contain variable names embedded in strings
#' (e.g., formula syntax "A~B") but do not have a parallel \code{.types} entry.
#' These options will have all column names replaced with their encoded equivalents
#' using word-boundary-aware regex matching.
#'
#' @return A list with three components:
#' \itemize{
#' \item \code{options}: The encoded options with variable names replaced by "jaspColumnN".
Expand Down Expand Up @@ -544,7 +551,7 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL,
#' }
#'
#' @export
encodeOptionsAndDataset <- function(options, dataset) {
encodeOptionsAndDataset <- function(options, dataset, forceEncode = NULL) {

# Handle NULL dataset (analysis doesn't require data)
if (is.null(dataset)) {
Expand Down Expand Up @@ -579,7 +586,7 @@ encodeOptionsAndDataset <- function(options, dataset) {
names(encodingMap)[1] <- "original"

# Step 4: Encode the options
encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames)
encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames, forceEncode)

# Step 5: Create the encoded dataset
encodedDataset <- createEncodedDataset(dataset, encodingMap)
Expand Down Expand Up @@ -642,6 +649,12 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) {

result <- data.frame(variable = character(0), type = character(0), stringsAsFactors = FALSE)

# Special case: values is a list with a "value" element (from flattened types/value structure that preserved
# additional fields like "model" and "modelOriginal"). In this case, the actual variable names are in values$value
if (is.list(values) && "value" %in% names(values) && is.character(types)) {
return(extractPairsFromValueAndType(values$value, types, allColumnNames))
}

# Simple case: both are character vectors of same length

if (is.character(values) && is.character(types) && length(values) == length(types)) {
Expand Down Expand Up @@ -698,14 +711,35 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) {
#' @param options The options list.
#' @param encodingMap Data.frame with columns \code{original}, \code{encoded}, \code{type}.
#' @param allColumnNames Vector of valid column names.
#' @param forceEncode Optional character vector of option names to force-encode via regex.
#'
#' @return The options list with encoded variable names.
#' @keywords internal
encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
encodeOptionsWithMap <- function(options, encodingMap, allColumnNames, forceEncode = NULL) {

# Create lookup from original to encoded
lookup <- stats::setNames(encodingMap$encoded, encodingMap$original)

# Force-encode a string value using regex replacement

# Uses word boundaries to avoid partial matches
forceEncodeString <- function(x, lookup) {
if (!is.character(x) || length(x) == 0) {
return(x)
}
result <- x
for (i in seq_along(result)) {
for (origName in names(lookup)) {
# Use word boundary regex to replace column names
# Escape regex metacharacters in the original name
escapedName <- gsub("([.\\\\^$|?*+()\\[\\]\\{\\}-])", "\\\\\\\1", origName)
pattern <- paste0("(?<![A-Za-z0-9_])", escapedName, "(?![A-Za-z0-9_])")
result[i] <- gsub(pattern, lookup[[origName]], result[i], perl = TRUE)
}
}
return(result)
}

# Recursively encode values
encodeValue <- function(x) {
if (is.character(x)) {
Expand All @@ -716,6 +750,16 @@ encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
}
return(x)
} else if (is.list(x)) {
# Special handling for lists with both "model" and "modelOriginal" fields.
# JASP stores pre-encoded column names in "model" (e.g., "JaspColumn_0_Encoded"),
# but our encoding uses different names (e.g., "jaspColumn1"). Since JASP's
# encoding scheme doesn't match ours, we must re-encode from "modelOriginal"
# (which contains the original user-facing variable names) using our lookup.
if ("model" %in% names(x) && "modelOriginal" %in% names(x)) {
# Re-encode model from modelOriginal using regex-based replacement
x[["model"]] <- forceEncodeString(x[["modelOriginal"]], lookup)
}

# Recursively process list elements
for (i in seq_along(x)) {
x[[i]] <- encodeValue(x[[i]])
Expand All @@ -726,13 +770,42 @@ encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
}
}

# Force-encode a string value using regex replacement
# Uses word boundaries to avoid partial matches
forceEncodeValue <- function(x, lookup) {
if (!is.character(x) || length(x) == 0) {
return(x)
}
result <- x
for (i in seq_along(result)) {
for (origName in names(lookup)) {
# Use word boundary regex to replace column names
# (?<![A-Za-z0-9_]) is a negative lookbehind for word characters
# (?![A-Za-z0-9_]) is a negative lookahead for word characters
# This prevents matching partial words
# Escape regex metacharacters in the original name
escapedName <- gsub("([.\\^$|?*+()\\[\\]\\{\\}-])", "\\\\\\1", origName)
pattern <- paste0("(?<![A-Za-z0-9_])", escapedName, "(?![A-Za-z0-9_])")
result[i] <- gsub(pattern, lookup[[origName]], result[i], perl = TRUE)
}
}
return(result)
}

# Process all options except .meta and .types entries
optionNames <- names(options)
for (nm in optionNames) {
if (nm == ".meta" || grepl("\\.types$", nm)) {
next
}
options[[nm]] <- encodeValue(options[[nm]])

# Check if this option should be force-encoded
if (!is.null(forceEncode) && nm %in% forceEncode) {
# Force encode using regex - only works on character values
options[[nm]] <- forceEncodeValue(options[[nm]], lookup)
} else {
options[[nm]] <- encodeValue(options[[nm]])
}
}

return(options)
Expand Down
23 changes: 23 additions & 0 deletions R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ fixOptionsForVariableTypes <- function(options) {
return(types)
}

# Fields that are internal JASP metadata and should not be preserved as user-facing options
# when flattening types/value structures
internalFields <- c("types", "value", "optionKey", "columns", "prefixedColumns")

# Recursively flatten a nested list structure, extracting types into a parallel structure
# Returns a list with $value (the flattened value) and $types (the parallel types structure)
flattenRecursive <- function(obj) {
Expand All @@ -250,6 +254,25 @@ fixOptionsForVariableTypes <- function(options) {
# Build types structure
typesStructure <- buildTypesStructure(types, value, optionKey)

# Check for additional fields that should be preserved (e.g., "model", "modelOriginal")
# These are user-facing fields that exist alongside types/value structure
additionalFields <- setdiff(names(obj), internalFields)

# If there are additional fields, preserve them alongside the flattened value
if (length(additionalFields) > 0) {
# Create a new object with the flattened value and preserved additional fields
newObj <- list()
newObj[["value"]] <- value

for (field in additionalFields) {
# Recursively process each additional field in case it also needs flattening
result <- flattenRecursive(obj[[field]])
newObj[[field]] <- result$value
}

return(list(value = newObj, types = typesStructure))
}

# Recursively process the value in case it contains nested structures
if (is.list(value)) {
result <- flattenRecursive(value)
Expand Down
51 changes: 39 additions & 12 deletions R/test-generator.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
#' with hyphens. If FALSE (default), preserves original spacing and characters in filenames.
#' @param overwrite Logical. If TRUE, overwrites existing test files. If FALSE (default),
#' skips files that already exist.
#' @param forceEncode Optional character vector of option names that should be forcibly
#' encoded using regular expression replacement. This is useful for options like
#' \code{model} that contain variable names embedded in strings (e.g., formula syntax
#' "A~B") but do not have a parallel \code{.types} entry. These options will have all
#' column names replaced with their encoded equivalents using word-boundary-aware regex.
#'
#' @details
#' This function processes JASP example files and generates corresponding test files in
Expand Down Expand Up @@ -50,10 +55,14 @@
#'
#' # Overwrite existing test files
#' makeTestsFromExamples(overwrite = TRUE)
#'
#' # Force encode 'model' option for analyses with embedded variable names
#' makeTestsFromExamples(forceEncode = "model")
#' }
#'
#' @export makeTestsFromExamples
makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE) {
makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE,
forceEncode = NULL) {
# Determine module directory

if (missing(module.dir)) {
Expand Down Expand Up @@ -123,7 +132,8 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite
module.dir = module.dir,
sanitize = sanitize, overwrite = overwrite,
copyToExamples = copyToExamples,
pkgAnalyses = pkgAnalyses
pkgAnalyses = pkgAnalyses,
forceEncode = forceEncode
)
if (!is.null(result)) {
if (!is.null(attr(result, "copiedTo"))) {
Expand Down Expand Up @@ -176,14 +186,15 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite
#'
#' @param pkgAnalyses Optional character vector of allowed analysis names for this module.
#' If provided, analyses not in this list will be skipped.
#' @param forceEncode Optional character vector of option names to force-encode via regex.
#'
#' @return The path to the created test file (with attr "skipped" if skipped,
#' and attr "copiedTo" if copied), or NULL if no tests were generated
#' (e.g., all analyses were skipped or processing failed).
#' @keywords internal
makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
overwrite = FALSE, copyToExamples = FALSE,
pkgAnalyses = NULL) {
pkgAnalyses = NULL, forceEncode = NULL) {
# Extract options from the JASP file
allOptions <- analysisOptions(jaspFile)

Expand Down Expand Up @@ -265,7 +276,7 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
message(" Running analysis ", i, "/", length(allOptions), ": ", analysisName)

# Encode options and dataset
encoded <- encodeOptionsAndDataset(opts, dataset)
encoded <- encodeOptionsAndDataset(opts, dataset, forceEncode = forceEncode)

# Run the analysis to get results
tryCatch(
Expand All @@ -281,7 +292,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
analysisIndex = i,
totalAnalyses = length(allOptions),
jaspFileName = basename(jaspFile),
results = results
results = results,
forceEncode = forceEncode
)

testBlocks <- c(testBlocks, list(testBlock))
Expand All @@ -293,7 +305,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
analysisName = analysisName,
analysisIndex = i,
totalAnalyses = length(allOptions),
jaspFileName = basename(jaspFile)
jaspFileName = basename(jaspFile),
forceEncode = forceEncode
)
testBlocks <<- c(testBlocks, list(testBlock))
}
Expand Down Expand Up @@ -354,10 +367,12 @@ generateExampleTestFileContent <- function(baseName, sanitizedName, testBlocks)
#' @param totalAnalyses Total number of analyses in the file.
#' @param jaspFileName Name of the JASP file.
#' @param results The analysis results.
#' @param forceEncode Optional character vector of option names to force-encode via regex.
#'
#' @return Character string with the test_that block.
#' @keywords internal
generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results) {
generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results,
forceEncode = NULL) {
# Extract tests from results
tests <- tryCatch(
{
Expand Down Expand Up @@ -397,9 +412,14 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses,
lines <- c(lines, " dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)")
lines <- c(lines, "")

# Encode and run
# Encode and run - include forceEncode if provided
lines <- c(lines, " # Encode and run analysis")
lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
if (!is.null(forceEncode) && length(forceEncode) > 0) {
forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")')
lines <- c(lines, paste0(" encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")"))
} else {
lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
}
lines <- c(lines, " set.seed(1)")
lines <- c(lines, paste0(' results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)'))
lines <- c(lines, "")
Expand Down Expand Up @@ -446,10 +466,12 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses,
#' @param analysisIndex Index of this analysis in the JASP file.
#' @param totalAnalyses Total number of analyses in the file.
#' @param jaspFileName Name of the JASP file.
#' @param forceEncode Optional character vector of option names to force-encode via regex.
#'
#' @return Character string with the test_that block.
#' @keywords internal
generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName) {
generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName,
forceEncode = NULL) {
lines <- character(0)

# Test description
Expand Down Expand Up @@ -478,9 +500,14 @@ generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnal
lines <- c(lines, " dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)")
lines <- c(lines, "")

# Encode and run
# Encode and run - include forceEncode if provided
lines <- c(lines, " # Encode and run analysis")
lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
if (!is.null(forceEncode) && length(forceEncode) > 0) {
forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")')
lines <- c(lines, paste0(" encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")"))
} else {
lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
}
lines <- c(lines, " set.seed(1)")
lines <- c(lines, paste0(' results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)'))
lines <- c(lines, "")
Expand Down
9 changes: 8 additions & 1 deletion man/encodeOptionsAndDataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/encodeOptionsWithMap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/generateExampleTestBlock.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading