jasp-stats · vandenman · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/R/dataset.R b/R/dataset.R
@@ -505,6 +505,13 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL,
 #' @param options A named list of analysis options, typically from \code{analysisOptions()}.
 #' @param dataset A data.frame or the name/path of a dataset to be encoded.
 #'
+#' @param forceEncode Optional character vector of option names that should be
+#'   forcibly encoded using regular expression replacement. This is useful for
+#'   options like \code{model} that contain variable names embedded in strings
+#'   (e.g., formula syntax "A~B") but do not have a parallel \code{.types} entry.
+#'   These options will have all column names replaced with their encoded equivalents
+#'   using word-boundary-aware regex matching.
+#'
 #' @return A list with three components:
 #' \itemize{
 #'   \item \code{options}: The encoded options with variable names replaced by "jaspColumnN".
@@ -544,7 +551,7 @@ devcat <- function(..., file = "", sep = " ", fill = FALSE, labels = NULL,
 #' }
 #'
 #' @export
-encodeOptionsAndDataset <- function(options, dataset) {
+encodeOptionsAndDataset <- function(options, dataset, forceEncode = NULL) {
 
   # Handle NULL dataset (analysis doesn't require data)
   if (is.null(dataset)) {
@@ -579,7 +586,7 @@ encodeOptionsAndDataset <- function(options, dataset) {
   names(encodingMap)[1] <- "original"
 
   # Step 4: Encode the options
-  encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames)
+  encodedOptions <- encodeOptionsWithMap(options, encodingMap, allColumnNames, forceEncode)
 
   # Step 5: Create the encoded dataset
   encodedDataset <- createEncodedDataset(dataset, encodingMap)
@@ -642,6 +649,12 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) {
 
   result <- data.frame(variable = character(0), type = character(0), stringsAsFactors = FALSE)
 
+  # Special case: values is a list with a "value" element (from flattened types/value structure that preserved
+  # additional fields like "model" and "modelOriginal"). In this case, the actual variable names are in values$value
+  if (is.list(values) && "value" %in% names(values) && is.character(types)) {
+    return(extractPairsFromValueAndType(values$value, types, allColumnNames))
+  }
+
   # Simple case: both are character vectors of same length
 
   if (is.character(values) && is.character(types) && length(values) == length(types)) {
@@ -698,14 +711,35 @@ extractPairsFromValueAndType <- function(values, types, allColumnNames) {
 #' @param options The options list.
 #' @param encodingMap Data.frame with columns \code{original}, \code{encoded}, \code{type}.
 #' @param allColumnNames Vector of valid column names.
+#' @param forceEncode Optional character vector of option names to force-encode via regex.
 #'
 #' @return The options list with encoded variable names.
 #' @keywords internal
-encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
+encodeOptionsWithMap <- function(options, encodingMap, allColumnNames, forceEncode = NULL) {
 
   # Create lookup from original to encoded
   lookup <- stats::setNames(encodingMap$encoded, encodingMap$original)
 
+  # Force-encode a string value using regex replacement
+
+  # Uses word boundaries to avoid partial matches
+  forceEncodeString <- function(x, lookup) {
+    if (!is.character(x) || length(x) == 0) {
+      return(x)
+    }
+    result <- x
+    for (i in seq_along(result)) {
+      for (origName in names(lookup)) {
+        # Use word boundary regex to replace column names
+        # Escape regex metacharacters in the original name
+        escapedName <- gsub("([.\\\\^$|?*+()\\[\\]\\{\\}-])", "\\\\\\\1", origName)
+        pattern <- paste0("(?<![A-Za-z0-9_])", escapedName, "(?![A-Za-z0-9_])")
+        result[i] <- gsub(pattern, lookup[[origName]], result[i], perl = TRUE)
+      }
+    }
+    return(result)
+  }
+
   # Recursively encode values
   encodeValue <- function(x) {
     if (is.character(x)) {
@@ -716,6 +750,16 @@ encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
       }
       return(x)
     } else if (is.list(x)) {
+      # Special handling for lists with both "model" and "modelOriginal" fields.
+      # JASP stores pre-encoded column names in "model" (e.g., "JaspColumn_0_Encoded"),
+      # but our encoding uses different names (e.g., "jaspColumn1"). Since JASP's
+      # encoding scheme doesn't match ours, we must re-encode from "modelOriginal"
+      # (which contains the original user-facing variable names) using our lookup.
+      if ("model" %in% names(x) && "modelOriginal" %in% names(x)) {
+        # Re-encode model from modelOriginal using regex-based replacement
+        x[["model"]] <- forceEncodeString(x[["modelOriginal"]], lookup)
+      }
+
       # Recursively process list elements
       for (i in seq_along(x)) {
         x[[i]] <- encodeValue(x[[i]])
@@ -726,13 +770,42 @@ encodeOptionsWithMap <- function(options, encodingMap, allColumnNames) {
     }
   }
 
+  # Force-encode a string value using regex replacement
+  # Uses word boundaries to avoid partial matches
+  forceEncodeValue <- function(x, lookup) {
+    if (!is.character(x) || length(x) == 0) {
+      return(x)
+    }
+    result <- x
+    for (i in seq_along(result)) {
+      for (origName in names(lookup)) {
+        # Use word boundary regex to replace column names
+        # (?<![A-Za-z0-9_]) is a negative lookbehind for word characters
+        # (?![A-Za-z0-9_]) is a negative lookahead for word characters
+        # This prevents matching partial words
+        # Escape regex metacharacters in the original name
+        escapedName <- gsub("([.\\^$|?*+()\\[\\]\\{\\}-])", "\\\\\\1", origName)
+        pattern <- paste0("(?<![A-Za-z0-9_])", escapedName, "(?![A-Za-z0-9_])")
+        result[i] <- gsub(pattern, lookup[[origName]], result[i], perl = TRUE)
+      }
+    }
+    return(result)
+  }
+
   # Process all options except .meta and .types entries
   optionNames <- names(options)
   for (nm in optionNames) {
     if (nm == ".meta" || grepl("\\.types$", nm)) {
       next
     }
-    options[[nm]] <- encodeValue(options[[nm]])
+
+    # Check if this option should be force-encoded
+    if (!is.null(forceEncode) && nm %in% forceEncode) {
+      # Force encode using regex - only works on character values
+      options[[nm]] <- forceEncodeValue(options[[nm]], lookup)
+    } else {
+      options[[nm]] <- encodeValue(options[[nm]])
+    }
   }
 
   return(options)

diff --git a/R/options.R b/R/options.R
@@ -234,6 +234,10 @@ fixOptionsForVariableTypes <- function(options) {
     return(types)
   }
 
+  # Fields that are internal JASP metadata and should not be preserved as user-facing options
+  # when flattening types/value structures
+  internalFields <- c("types", "value", "optionKey", "columns", "prefixedColumns")
+
   # Recursively flatten a nested list structure, extracting types into a parallel structure
   # Returns a list with $value (the flattened value) and $types (the parallel types structure)
   flattenRecursive <- function(obj) {
@@ -250,6 +254,25 @@ fixOptionsForVariableTypes <- function(options) {
       # Build types structure
       typesStructure <- buildTypesStructure(types, value, optionKey)
 
+      # Check for additional fields that should be preserved (e.g., "model", "modelOriginal")
+      # These are user-facing fields that exist alongside types/value structure
+      additionalFields <- setdiff(names(obj), internalFields)
+
+      # If there are additional fields, preserve them alongside the flattened value
+      if (length(additionalFields) > 0) {
+        # Create a new object with the flattened value and preserved additional fields
+        newObj <- list()
+        newObj[["value"]] <- value
+
+        for (field in additionalFields) {
+          # Recursively process each additional field in case it also needs flattening
+          result <- flattenRecursive(obj[[field]])
+          newObj[[field]] <- result$value
+        }
+
+        return(list(value = newObj, types = typesStructure))
+      }
+
       # Recursively process the value in case it contains nested structures
       if (is.list(value)) {
         result <- flattenRecursive(value)

diff --git a/R/test-generator.R b/R/test-generator.R
@@ -12,6 +12,11 @@
 #'   with hyphens. If FALSE (default), preserves original spacing and characters in filenames.
 #' @param overwrite Logical. If TRUE, overwrites existing test files. If FALSE (default),
 #'   skips files that already exist.
+#' @param forceEncode Optional character vector of option names that should be forcibly
+#'   encoded using regular expression replacement. This is useful for options like
+#'   \code{model} that contain variable names embedded in strings (e.g., formula syntax
+#'   "A~B") but do not have a parallel \code{.types} entry. These options will have all
+#'   column names replaced with their encoded equivalents using word-boundary-aware regex.
 #'
 #' @details
 #' This function processes JASP example files and generates corresponding test files in
@@ -50,10 +55,14 @@
 #'
 #' # Overwrite existing test files
 #' makeTestsFromExamples(overwrite = TRUE)
+#'
+#' # Force encode 'model' option for analyses with embedded variable names
+#' makeTestsFromExamples(forceEncode = "model")
 #' }
 #'
 #' @export makeTestsFromExamples
-makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE) {
+makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite = FALSE,
+                                  forceEncode = NULL) {
   # Determine module directory
 
   if (missing(module.dir)) {
@@ -123,7 +132,8 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite
           module.dir = module.dir,
           sanitize = sanitize, overwrite = overwrite,
           copyToExamples = copyToExamples,
-          pkgAnalyses = pkgAnalyses
+          pkgAnalyses = pkgAnalyses,
+          forceEncode = forceEncode
         )
         if (!is.null(result)) {
           if (!is.null(attr(result, "copiedTo"))) {
@@ -176,14 +186,15 @@ makeTestsFromExamples <- function(path, module.dir, sanitize = FALSE, overwrite
 #'
 #' @param pkgAnalyses Optional character vector of allowed analysis names for this module.
 #'   If provided, analyses not in this list will be skipped.
+#' @param forceEncode Optional character vector of option names to force-encode via regex.
 #'
 #' @return The path to the created test file (with attr "skipped" if skipped,
 #'   and attr "copiedTo" if copied), or NULL if no tests were generated
 #'   (e.g., all analyses were skipped or processing failed).
 #' @keywords internal
 makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
                                         overwrite = FALSE, copyToExamples = FALSE,
-                                        pkgAnalyses = NULL) {
+                                        pkgAnalyses = NULL, forceEncode = NULL) {
   # Extract options from the JASP file
   allOptions <- analysisOptions(jaspFile)
 
@@ -265,7 +276,7 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
     message("  Running analysis ", i, "/", length(allOptions), ": ", analysisName)
 
     # Encode options and dataset
-    encoded <- encodeOptionsAndDataset(opts, dataset)
+    encoded <- encodeOptionsAndDataset(opts, dataset, forceEncode = forceEncode)
 
     # Run the analysis to get results
     tryCatch(
@@ -281,7 +292,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
           analysisIndex = i,
           totalAnalyses = length(allOptions),
           jaspFileName = basename(jaspFile),
-          results = results
+          results = results,
+          forceEncode = forceEncode
         )
 
         testBlocks <- c(testBlocks, list(testBlock))
@@ -293,7 +305,8 @@ makeTestsFromSingleJASPFile <- function(jaspFile, module.dir, sanitize = FALSE,
           analysisName = analysisName,
           analysisIndex = i,
           totalAnalyses = length(allOptions),
-          jaspFileName = basename(jaspFile)
+          jaspFileName = basename(jaspFile),
+          forceEncode = forceEncode
         )
         testBlocks <<- c(testBlocks, list(testBlock))
       }
@@ -354,10 +367,12 @@ generateExampleTestFileContent <- function(baseName, sanitizedName, testBlocks)
 #' @param totalAnalyses Total number of analyses in the file.
 #' @param jaspFileName Name of the JASP file.
 #' @param results The analysis results.
+#' @param forceEncode Optional character vector of option names to force-encode via regex.
 #'
 #' @return Character string with the test_that block.
 #' @keywords internal
-generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results) {
+generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName, results,
+                                     forceEncode = NULL) {
   # Extract tests from results
   tests <- tryCatch(
     {
@@ -397,9 +412,14 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses,
   lines <- c(lines, "  dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)")
   lines <- c(lines, "")
 
-  # Encode and run
+  # Encode and run - include forceEncode if provided
   lines <- c(lines, "  # Encode and run analysis")
-  lines <- c(lines, "  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
+  if (!is.null(forceEncode) && length(forceEncode) > 0) {
+    forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")')
+    lines <- c(lines, paste0("  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")"))
+  } else {
+    lines <- c(lines, "  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
+  }
   lines <- c(lines, "  set.seed(1)")
   lines <- c(lines, paste0('  results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)'))
   lines <- c(lines, "")
@@ -446,10 +466,12 @@ generateExampleTestBlock <- function(analysisName, analysisIndex, totalAnalyses,
 #' @param analysisIndex Index of this analysis in the JASP file.
 #' @param totalAnalyses Total number of analyses in the file.
 #' @param jaspFileName Name of the JASP file.
+#' @param forceEncode Optional character vector of option names to force-encode via regex.
 #'
 #' @return Character string with the test_that block.
 #' @keywords internal
-generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName) {
+generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnalyses, jaspFileName,
+                                          forceEncode = NULL) {
   lines <- character(0)
 
   # Test description
@@ -478,9 +500,14 @@ generateExampleTestBlockBasic <- function(analysisName, analysisIndex, totalAnal
   lines <- c(lines, "  dataset <- jaspTools::extractDatasetFromJASPFile(jaspFile)")
   lines <- c(lines, "")
 
-  # Encode and run
+  # Encode and run - include forceEncode if provided
   lines <- c(lines, "  # Encode and run analysis")
-  lines <- c(lines, "  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
+  if (!is.null(forceEncode) && length(forceEncode) > 0) {
+    forceEncodeStr <- paste0('c("', paste(forceEncode, collapse = '", "'), '")')
+    lines <- c(lines, paste0("  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset, forceEncode = ", forceEncodeStr, ")"))
+  } else {
+    lines <- c(lines, "  encoded <- jaspTools:::encodeOptionsAndDataset(opts, dataset)")
+  }
   lines <- c(lines, "  set.seed(1)")
   lines <- c(lines, paste0('  results <- jaspTools::runAnalysis("', analysisName, '", encoded$dataset, encoded$options, encodedDataset = TRUE)'))
   lines <- c(lines, "")

diff --git a/man/encodeOptionsAndDataset.Rd b/man/encodeOptionsAndDataset.Rd
diff --git a/man/encodeOptionsWithMap.Rd b/man/encodeOptionsWithMap.Rd
diff --git a/man/generateExampleTestBlock.Rd b/man/generateExampleTestBlock.Rd