diff --git a/R/dataset.R b/R/dataset.R index 336076c..9f34eaf 100644 --- a/R/dataset.R +++ b/R/dataset.R @@ -505,6 +505,13 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL, #' @param options A named list of analysis options, typically from \code{analysisOptions()}. #' @param dataset A data.frame or the name/path of a dataset to be encoded. #' +#' @param forceEncode Optional character vector of option names that should be +#' forcibly encoded using regular expression replacement. This is useful for +#' options like \code{model} that contain variable names embedded in strings +#' (e.g., formula syntax "A~B") but do not have a parallel \code{.types} entry. +#' These options will have all column names replaced with their encoded equivalents +#' using word-boundary-aware regex matching. +#' #' @return A list with three components: #' \itemize{ #' \item \code{options}: The encoded options with variable names replaced by "jaspColumnN". @@ -544,7 +551,7 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL, #' } #' #' @export -encodeOptionsAndDataset <- function(options, dataset) { +encodeOptionsAndDataset <- function(options, dataset, forceEncode = NULL) { # Handle NULL dataset (analysis doesn't require data) if (is.null(dataset)) { @@ -579,7 +586,7 @@ encodeOptionsAndDataset <- function(options, dataset) { names(encodingMap)[1] <- "original" # Step 4: Encode the options - encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames) + encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames, forceEncode) # Step 5: Create the encoded dataset encodedDataset <- createEncodedDataset(dataset, encodingMap) @@ -642,6 +649,12 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) { result <- data.frame(variable = character(0), type = character(0), stringsAsFactors = FALSE) + # Special case: values is a list with a "value" element (from flattened types/value structure that preserved + # additional fields like "model" and "modelOriginal"). In this case, the actual variable names are in values$value + if (is.list(values) && "value" %in% names(values) && is.character(types)) { + return(extractPairsFromValueAndType(values$value, types, allColumnNames)) + } + # Simple case: both are character vectors of same length if (is.character(values) && is.character(types) && length(values) == length(types)) { @@ -698,14 +711,35 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) { #' @param options The options list. #' @param encodingMap Data.frame with columns \code{original}, \code{encoded}, \code{type}. #' @param allColumnNames Vector of valid column names. +#' @param forceEncode Optional character vector of option names to force-encode via regex. #' #' @return The options list with encoded variable names. #' @keywords internal -encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) { +encodeOptionsWithMap <- function(options, encodingMap, allColumnNames, forceEncode = NULL) { # Create lookup from original to encoded lookup <- stats::setNames(encodingMap$encoded, encodingMap$original) + # Force-encode a string value using regex replacement + + # Uses word boundaries to avoid partial matches + forceEncodeString <- function(x, lookup) { + if (!is.character(x) || length(x) == 0) { + return(x) + } + result <- x + for (i in seq_along(result)) { + for (origName in names(lookup)) { + # Use word boundary regex to replace column names + # Escape regex metacharacters in the original name + escapedName <- gsub("([.\\\\^$|?*+()\\[\\]\\{\\}-])", "\\\\\\\1", origName) + pattern <- paste0("(? 0) { + # Create a new object with the flattened value and preserved additional fields + newObj <- list() + newObj[["value"]] <- value + + for (field in additionalFields) { + # Recursively process each additional field in case it also needs flattening + result <- flattenRecursive(obj[[field]]) + newObj[[field]] <- result$value + } + + return(list(value = newObj, types = typesStructure)) + } + # Recursively process the value in case it contains nested structures if (is.list(value)) { result <- flattenRecursive(value) diff --git a/R/test-generator.R b/R/test-generator.R index c5da0d5..96ddd75 100644 --- a/R/test-generator.R +++ b/R/test-generator.R @@ -12,6 +12,11 @@ #' with hyphens. If FALSE (default), preserves original spacing and characters in filenames. #' @param overwrite Logical. If TRUE, overwrites existing test files. If FALSE (default), #' skips files that already exist. +#' @param forceEncode Optional character vector of option names that should be forcibly +#' encoded using regular expression replacement. This is useful for options like +#' \code{model} that contain variable names embedded in strings (e.g., formula syntax +#' "A~B") but do not have a parallel \code{.types} entry. These options will have all +#' column names replaced with their encoded equivalents using word-boundary-aware regex. #' #' @details #' This function processes JASP example files and generates corresponding test files in @@ -50,10 +55,14 @@ #' #' # Overwrite existing test files #' makeTestsFromExamples(overwrite = TRUE) +#' +#' # Force encode 'model' option for analyses with embedded variable names +#' makeTestsFromExamples(forceEncode = "model") #' } #' #' @export makeTestsFromExamples -makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE) { +makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE, + forceEncode = NULL) { # Determine module directory if (missing(module.dir)) { @@ -123,7 +132,8 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite module.dir = module.dir, sanitize = sanitize, overwrite = overwrite, copyToExamples = copyToExamples, - pkgAnalyses = pkgAnalyses + pkgAnalyses = pkgAnalyses, + forceEncode = forceEncode ) if (!is.null(result)) { if (!is.null(attr(result, "copiedTo"))) { @@ -176,6 +186,7 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite #' #' @param pkgAnalyses Optional character vector of allowed analysis names for this module. #' If provided, analyses not in this list will be skipped. +#' @param forceEncode Optional character vector of option names to force-encode via regex. #' #' @return The path to the created test file (with attr "skipped" if skipped, #' and attr "copiedTo" if copied), or NULL if no tests were generated @@ -183,7 +194,7 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite #' @keywords internal makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE, overwrite = FALSE, copyToExamples = FALSE, - pkgAnalyses = NULL) { + pkgAnalyses = NULL, forceEncode = NULL) { # Extract options from the JASP file allOptions <- analysisOptions(jaspFile) @@ -265,7 +276,7 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE, message(" Running analysis ", i, "/", length(allOptions), ": ", analysisName) # Encode options and dataset - encoded <- encodeOptionsAndDataset(opts, dataset) + encoded <- encodeOptionsAndDataset(opts, dataset, forceEncode = forceEncode) # Run the analysis to get results tryCatch( @@ -281,7 +292,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE, analysisIndex = i, totalAnalyses = length(allOptions), jaspFileName = basename(jaspFile), - results = results + results = results, + forceEncode = forceEncode ) testBlocks <- c(testBlocks, list(testBlock)) @@ -293,7 +305,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE, analysisName = analysisName, analysisIndex = i, totalAnalyses = length(allOptions), - jaspFileName = basename(jaspFile) + jaspFileName = basename(jaspFile), + forceEncode = forceEncode ) testBlocks <<- c(testBlocks, list(testBlock)) } @@ -354,10 +367,12 @@ generateExampleTestFileContent <- function(baseName, sanitizedName, testBlocks) #' @param totalAnalyses Total number of analyses in the file. #' @param jaspFileName Name of the JASP file. #' @param results The analysis results. +#' @param forceEncode Optional character vector of option names to force-encode via regex. #' #' @return Character string with the test_that block. #' @keywords internal -generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results) { +generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results, + forceEncode = NULL) { # Extract tests from results tests <- tryCatch( { @@ -397,9 +412,14 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, lines <- c(lines, " dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)") lines <- c(lines, "") - # Encode and run + # Encode and run - include forceEncode if provided lines <- c(lines, " # Encode and run analysis") - lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)") + if (!is.null(forceEncode) && length(forceEncode) > 0) { + forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")') + lines <- c(lines, paste0(" encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")")) + } else { + lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)") + } lines <- c(lines, " set.seed(1)") lines <- c(lines, paste0(' results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)')) lines <- c(lines, "") @@ -446,10 +466,12 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, #' @param analysisIndex Index of this analysis in the JASP file. #' @param totalAnalyses Total number of analyses in the file. #' @param jaspFileName Name of the JASP file. +#' @param forceEncode Optional character vector of option names to force-encode via regex. #' #' @return Character string with the test_that block. #' @keywords internal -generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName) { +generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, + forceEncode = NULL) { lines <- character(0) # Test description @@ -478,9 +500,14 @@ generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnal lines <- c(lines, " dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)") lines <- c(lines, "") - # Encode and run + # Encode and run - include forceEncode if provided lines <- c(lines, " # Encode and run analysis") - lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)") + if (!is.null(forceEncode) && length(forceEncode) > 0) { + forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")') + lines <- c(lines, paste0(" encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")")) + } else { + lines <- c(lines, " encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)") + } lines <- c(lines, " set.seed(1)") lines <- c(lines, paste0(' results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)')) lines <- c(lines, "") diff --git a/man/encodeOptionsAndDataset.Rd b/man/encodeOptionsAndDataset.Rd index 578eb77..45be9e1 100644 --- a/man/encodeOptionsAndDataset.Rd +++ b/man/encodeOptionsAndDataset.Rd @@ -4,12 +4,19 @@ \alias{encodeOptionsAndDataset} \title{Encode Options and Dataset for JASP Analysis} \usage{ -encodeOptionsAndDataset(options, dataset) +encodeOptionsAndDataset(options, dataset, forceEncode = NULL) } \arguments{ \item{options}{A named list of analysis options, typically from \code{analysisOptions()}.} \item{dataset}{A data.frame or the name/path of a dataset to be encoded.} + +\item{forceEncode}{Optional character vector of option names that should be +forcibly encoded using regular expression replacement. This is useful for +options like \code{model} that contain variable names embedded in strings +(e.g., formula syntax "A~B") but do not have a parallel \code{.types} entry. +These options will have all column names replaced with their encoded equivalents +using word-boundary-aware regex matching.} } \value{ A list with three components: diff --git a/man/encodeOptionsWithMap.Rd b/man/encodeOptionsWithMap.Rd index 189a295..1c9b953 100644 --- a/man/encodeOptionsWithMap.Rd +++ b/man/encodeOptionsWithMap.Rd @@ -4,7 +4,7 @@ \alias{encodeOptionsWithMap} \title{Encode Options Using Encoding Map} \usage{ -encodeOptionsWithMap(options, encodingMap, allColumnNames) +encodeOptionsWithMap(options, encodingMap, allColumnNames, forceEncode = NULL) } \arguments{ \item{options}{The options list.} @@ -12,6 +12,8 @@ encodeOptionsWithMap(options, encodingMap, allColumnNames) \item{encodingMap}{Data.frame with columns \code{original}, \code{encoded}, \code{type}.} \item{allColumnNames}{Vector of valid column names.} + +\item{forceEncode}{Optional character vector of option names to force-encode via regex.} } \value{ The options list with encoded variable names. diff --git a/man/generateExampleTestBlock.Rd b/man/generateExampleTestBlock.Rd index 3835338..09953d9 100644 --- a/man/generateExampleTestBlock.Rd +++ b/man/generateExampleTestBlock.Rd @@ -9,7 +9,8 @@ generateExampleTestBlock( analysisIndex, totalAnalyses, jaspFileName, - results + results, + forceEncode = NULL ) } \arguments{ @@ -22,6 +23,8 @@ generateExampleTestBlock( \item{jaspFileName}{Name of the JASP file.} \item{results}{The analysis results.} + +\item{forceEncode}{Optional character vector of option names to force-encode via regex.} } \value{ Character string with the test_that block. diff --git a/man/generateExampleTestBlockBasic.Rd b/man/generateExampleTestBlockBasic.Rd index 7ef207d..2cb5947 100644 --- a/man/generateExampleTestBlockBasic.Rd +++ b/man/generateExampleTestBlockBasic.Rd @@ -8,7 +8,8 @@ generateExampleTestBlockBasic( analysisName, analysisIndex, totalAnalyses, - jaspFileName + jaspFileName, + forceEncode = NULL ) } \arguments{ @@ -19,6 +20,8 @@ generateExampleTestBlockBasic( \item{totalAnalyses}{Total number of analyses in the file.} \item{jaspFileName}{Name of the JASP file.} + +\item{forceEncode}{Optional character vector of option names to force-encode via regex.} } \value{ Character string with the test_that block. diff --git a/man/makeTestsFromExamples.Rd b/man/makeTestsFromExamples.Rd index bb753ab..9abe315 100644 --- a/man/makeTestsFromExamples.Rd +++ b/man/makeTestsFromExamples.Rd @@ -4,7 +4,13 @@ \alias{makeTestsFromExamples} \title{Create test files from JASP example files} \usage{ -makeTestsFromExamples(path, module.dir, sanitize = FALSE, overwrite = FALSE) +makeTestsFromExamples( + path, + module.dir, + sanitize = FALSE, + overwrite = FALSE, + forceEncode = NULL +) } \arguments{ \item{path}{Optional string path to a directory containing JASP example files. @@ -20,6 +26,12 @@ with hyphens. If FALSE (default), preserves original spacing and characters in f \item{overwrite}{Logical. If TRUE, overwrites existing test files. If FALSE (default), skips files that already exist.} + +\item{forceEncode}{Optional character vector of option names that should be forcibly +encoded using regular expression replacement. This is useful for options like +\code{model} that contain variable names embedded in strings (e.g., formula syntax +"A~B") but do not have a parallel \code{.types} entry. These options will have all +column names replaced with their encoded equivalents using word-boundary-aware regex.} } \value{ Invisibly returns a character vector of created/processed test file paths. @@ -64,6 +76,9 @@ makeTestsFromExamples(path = "path/to/jasp/files", module.dir = "path/to/module" # Overwrite existing test files makeTestsFromExamples(overwrite = TRUE) + +# Force encode 'model' option for analyses with embedded variable names +makeTestsFromExamples(forceEncode = "model") } } diff --git a/man/makeTestsFromSingleJASPFile.Rd b/man/makeTestsFromSingleJASPFile.Rd index fc99d6c..6de0c5a 100644 --- a/man/makeTestsFromSingleJASPFile.Rd +++ b/man/makeTestsFromSingleJASPFile.Rd @@ -10,7 +10,8 @@ makeTestsFromSingleJASPFile( sanitize = FALSE, overwrite = FALSE, copyToExamples = FALSE, - pkgAnalyses = NULL + pkgAnalyses = NULL, + forceEncode = NULL ) } \arguments{ @@ -26,6 +27,8 @@ makeTestsFromSingleJASPFile( \item{pkgAnalyses}{Optional character vector of allowed analysis names for this module. If provided, analyses not in this list will be skipped.} + +\item{forceEncode}{Optional character vector of option names to force-encode via regex.} } \value{ The path to the created test file (with attr "skipped" if skipped, diff --git a/tests/JASPFiles/bainSem.jasp b/tests/JASPFiles/bainSem.jasp new file mode 100644 index 0000000..aa4b338 Binary files /dev/null and b/tests/JASPFiles/bainSem.jasp differ diff --git a/tests/testthat/test-encodeOptionsAndDataset.R b/tests/testthat/test-encodeOptionsAndDataset.R index 390a47a..7e59cfe 100644 --- a/tests/testthat/test-encodeOptionsAndDataset.R +++ b/tests/testthat/test-encodeOptionsAndDataset.R @@ -281,3 +281,264 @@ test_that("encodeOptionsAndDataset correctly encodes nested option structures", } } }) + +test_that("encodeOptionsAndDataset with forceEncode replaces column names in model string via regex", { + + jaspFile <- file.path(testthat::test_path(), "..", "JASPFiles", "bainSem.jasp") + + skip_if_not(file.exists(jaspFile), "Test JASP file not found") + + # Get options from the second analysis (has model with column names like peabody, age) + # Note: bainSem.jasp has 3 analyses, we use the second one + opts <- jaspTools:::analysisOptionsFromJASPFile(jaspFile)[[2]] + + # Load the dataset + dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile) + + # The second bainSem analysis has a 'model' option with embedded column names like: + # "A~B > A~peabody = A~age = 0\n..." + # where peabody and age are column names in the dataset + + # First, verify the model option exists and contains column names + expect_true("model" %in% names(opts)) + originalModel <- opts$model + + # Check that the model string contains some column names from the dataset + colNamesInModel <- colnames(dataset)[vapply(colnames(dataset), function(cn) { + grepl(cn, originalModel, fixed = TRUE) + }, logical(1))] + expect_true(length(colNamesInModel) > 0, + info = "Model option should contain column names from dataset") + + # Encode with forceEncode = "model" + result <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = "model") + + # Check that the encoding map was created + expect_true(nrow(result$encodingMap) > 0, + info = "Encoding map should have entries") + + # Check that the model option has been force-encoded + encodedModel <- result$options$model + + # The encoded model should not contain original column names that were in the encoding map + originalVars <- result$encodingMap$original + encodedVars <- result$encodingMap$encoded + + # Check that column names in the encoding map have been replaced + # Only check variables that were actually in the original model + for (i in seq_along(originalVars)) { + origVar <- originalVars[i] + encVar <- encodedVars[i] + + # Check if this variable was in the original model + escapedVar <- gsub("([.\\\\^$|?*+()\\[\\]\\{\\}])", "\\\\\\1", origVar) + pattern <- paste0("(?", encodedModel, fixed = TRUE) || grepl("&", encodedModel, fixed = TRUE), + info = "Model should still contain constraint operators") +}) + +test_that("forceEncode only affects specified options", { + + jaspFile <- file.path(testthat::test_path(), "..", "JASPFiles", "bainSem.jasp") + + skip_if_not(file.exists(jaspFile), "Test JASP file not found") + + # Get options from the second analysis + opts <- jaspTools:::analysisOptionsFromJASPFile(jaspFile)[[2]] + dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile) + + # Test that forceEncode only affects the 'model' option, not other options + # The 'syntax' option also references variables but has shouldEncode metadata + + # Encode with forceEncode = "model" + result <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = "model") + + # The 'model' option should be force-encoded (column names replaced with jaspColumnN) + encodedModel <- result$options$model + expect_true(grepl("jaspColumn", encodedModel), + info = "Model should contain encoded column names") + + # Verify that other options that should be encoded via the normal mechanism + # (e.g., 'syntax') are not changed by the forceEncode regex path + no_force <- jaspTools:::encodeOptionsAndDataset(opts, dataset) + expect_equal(result$options$syntax$model, no_force$options$syntax$model, + info = "syntax$model should be encoded by the normal mechanism and not be altered by forceEncode") +}) + +test_that("forceEncode handles multiple options", { + + # Create a simple test case with multiple options to force-encode + opts <- list( + formula1 = "A ~ B + C", + formula2 = "D ~ A + B", + regularOpt = c("A", "B"), + `regularOpt.types` = c("scale", "scale"), + `.meta` = list() + ) + + dataset <- data.frame( + A = 1:5, + B = 6:10, + C = 11:15, + D = 16:20, + stringsAsFactors = FALSE + ) + + # Encode with forceEncode for both formula options + result <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = c("formula1", "formula2")) + + # Check encoding map was created + expect_true(nrow(result$encodingMap) > 0) + + # Both formula options should have A and B replaced + expect_false(grepl("(? "jaspColumn1B" + opts <- list( + model = "AB ~ A + B", # AB is a single term, A and B are separate + variables = c("A", "B"), + `variables.types` = c("scale", "scale"), + `.meta` = list() + ) + + dataset <- data.frame( + A = 1:5, + B = 6:10, + AB = 11:15, # This column should NOT be matched if not in encoding map + stringsAsFactors = FALSE + ) + + # Only A and B are in the options with types, not AB + result <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = "model") + + # The model should have A and B replaced but AB should remain (since AB is not in encoding map) + encodedModel <- result$options$model + + # A and B should be replaced + expect_false(grepl("(?