From 28dcfa95b797213d9fe8d72c60c60a44c3b4f137 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Tue, 4 Nov 2025 15:03:13 -0500
Subject: [PATCH 01/10] Add medianNormalize() function

- added new medianNormalize() function for median normalization
  of soma_adat objects
- supports different methods for passing a reference: calculating an
  internal from the provided soma_adat with the ability to select
  specific samples, or passing an  external ADAT, tab or comma delimited
  file or  data.frame with Dilution and Reference columns
- includes validation for required hybridization and plate scale
  normalization steps
---
 NAMESPACE                                |   2 +
 NEWS.md                                  |  14 +-
 R/medianNormalize.R                      | 667 +++++++++++++++++++++++
 inst/WORDLIST                            |   2 +
 man/medianNormalize.Rd                   | 141 +++++
 man/soma_adat.Rd                         |  22 +-
 tests/testthat/_snaps/medianNormalize.md |  17 +
 tests/testthat/test-medianNormalize.R    | 363 ++++++++++++
 8 files changed, 1217 insertions(+), 11 deletions(-)
 create mode 100644 R/medianNormalize.R
 create mode 100644 man/medianNormalize.Rd
 create mode 100644 tests/testthat/_snaps/medianNormalize.md
 create mode 100644 tests/testthat/test-medianNormalize.R

diff --git a/NAMESPACE b/NAMESPACE
index d0754cee..ffe655c4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -112,6 +112,7 @@ export(lift_adat)
 export(loadAdatsAsList)
 export(locateSeqId)
 export(matchSeqIds)
+export(medianNormalize)
 export(meltExpressionSet)
 export(merge_clin)
 export(mutate)
@@ -218,5 +219,6 @@ importFrom(utils,capture.output)
 importFrom(utils,head)
 importFrom(utils,read.csv)
 importFrom(utils,read.delim)
+importFrom(utils,read.table)
 importFrom(utils,tail)
 importFrom(utils,write.table)
diff --git a/NEWS.md b/NEWS.md
index 1c689a83..35010b9f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,17 @@
 # SomaDataIO (6.5.0.9000)
 
+### New Functions
+
+* Added `medianNormalize()` function
+  - performs median normalization on `soma_adat` objects that have been 
+    hybridization normalized and plate scaled
+  - includes validation to ensure required normalization steps have been applied
+  - supports multiple reference approaches:
+    - Internal reference built from study samples
+    - Reference extracted from existing `soma_adat` object
+    - External reference as a data.frame
+  - supports custom grouping by multiple clinical variables
+
 ### Function and Object Improvements
 
 * Removed restrictive file validation from `read_annotations()`
@@ -35,7 +47,7 @@
 
 * Updated `dplyr` verb tests to no longer explicitly test
   for ordering of attributes (#165)
-
+  
 # SomaDataIO (6.4.0)
 
 ### New Functions
diff --git a/R/medianNormalize.R b/R/medianNormalize.R
new file mode 100644
index 00000000..ba24e334
--- /dev/null
+++ b/R/medianNormalize.R
@@ -0,0 +1,667 @@
+#' Perform Median Normalization
+#'
+#' @description Performs median normalization on a `soma_adat` object that has
+#' already undergone hybridization normalization and contains plate scale
+#' normalization factors.
+#'
+#' Median normalization is a technique used to adjust data sets to remove
+#' certain assay artifacts and biases prior to analysis, particularly those that
+#' may arise due to differences in overall protein concentration, pipetting
+#' errors, reagent concentration changes, assay timing, and other systematic
+#' variabilities. Median normalization can improve assay precision and reduce
+#' technical variations that can mask true biological signals.
+#'
+#' This function works by calculating or using reference median values for each
+#' dilution group present in the ADAT. Dilution groups are determined from the
+#' analyte metadata and represent different sample dilutions used in the assay
+#' (e.g., 1:1, 1:200, 1:2000). For each sample, the median RFU across all
+#' analytes within a dilution group is calculated and compared to the reference
+#' median for that group. Scale factors are then computed and applied to adjust
+#' each sample's values toward the reference, ensuring consistent signal levels
+#' across samples within each dilution group. This function supports multiple
+#' reference approaches: calculating an internal reference from study samples,
+#' or using a calculated reference from a different ADAT or external reference
+#' file.
+#'
+#' @section Important Considerations:
+#' \itemize{
+#'   \item If there is a known change in total protein concentration due to analysis
+#'     conditions, perform normalization _within_ groups using the `by` parameter
+#'   \item In newer SomaScan assay versions, population references improve
+#'     inter-plate consistency compared to within-plate references
+#' }
+#'
+#' @param adat A `soma_adat` object created using [read_adat()], containing
+#'   RFU values that have been hybridization normalized and plate scaled.
+#' @param reference Optional. Reference for median normalization. Can be:
+#'   \itemize{
+#'     \item `NULL` (default): Use internal reference from study samples
+#'     \item A `soma_adat` object: Extract reference from this ADAT
+#'     \item A file path (character): Read external reference from tab/comma-separated file
+#'     \item A data.frame: Use provided reference data directly
+#'   }
+#'   When providing an external reference file or data.frame it must contain:
+#'   \describe{
+#'     \item{Dilution}{Character column specifying the dilution group names
+#'       (e.g., "0_005", "0_5", "20"). These should match the dilution
+#'       groups present in the ADAT analyte data.}
+#'     \item{Reference}{Numeric column containing the reference median values
+#'       for each dilution group. These values will be used as the target
+#'       medians for normalization.}
+#'     \item{SeqId}{Optional character column. When included, SeqId-specific
+#'       reference values are used for more precise normalization. Values
+#'       must be in "10000-28" format, not "seq.10000.28" format.}
+#'   }
+#' @param ref_field Character. Field used to select reference samples when
+#'   `reference = NULL`. Default is `"SampleType"`.
+#' @param ref_value Character. Value(s) in `ref_field` to use as reference
+#'   when `reference = NULL`. Default is `c("QC", "Sample")`.
+#' @param by Character vector. Grouping variable(s) for grouped median normalization.
+#'   Must be column name(s) in the ADAT. Use grouping when there are known changes
+#'   in total protein load due to analysis conditions (e.g., disease vs. control,
+#'   treatment vs. baseline). Normalization will be performed within each group
+#'   separately. Default is `"SampleType"`.
+#' @param do_field Character. The field used to select samples for normalization
+#'   (others keep original values). Default is `"SampleType"`.
+#' @param do_regexp Character. A regular expression pattern to select samples
+#'   from `do_field` for normalization. Default is `"QC|Sample"`.
+#' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
+#' @return A `soma_adat` object with median normalization applied and RFU values
+#'   adjusted. The existing `NormScale_*` columns are updated to include the
+#'   effects of both plate scale normalization and median normalization.
+#' @examples
+#' \dontrun{
+#' # Internal reference from study samples (default)
+#' # Use when you have representative QC or control samples in your study
+#' med_norm_adat <- medianNormalize(adat)
+#'
+#' # Reference from specific samples in the ADAT
+#' # Use when you want to normalize against only QC samples
+#' med_norm_adat <- medianNormalize(adat,
+#'                                  ref_field = "SampleType",
+#'                                  ref_value = "QC")
+#'
+#' # Reference from another ADAT
+#' # Use when you have a reference population or control study
+#' ref_adat <- read_adat("reference_file.adat")
+#' med_norm_adat <- medianNormalize(adat, reference = ref_adat)
+#'
+#' # External reference file
+#' # Use when you have pre-calculated reference medians for each dilution
+#' med_norm_adat <- medianNormalize(adat, reference = "reference_file.csv")
+#'
+#' # External reference as data.frame
+#' # Use for programmatic control over reference values
+#' ref_data <- data.frame(
+#'   Dilution = c("0_005", "0_5", "20"),
+#'   Reference = c(1500.2, 3200.8, 4100.5)
+#' )
+#' med_norm_adat <- medianNormalize(adat, reference = ref_data)
+#'
+#' # Custom grouping by multiple variables
+#' # Use when total protein load changes due to analysis conditions
+#' # (normalize within groups to account for expected biological differences)
+#' med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
+#'
+#' # Normalize only specific sample types
+#' # Use when you want to preserve original values for certain sample types
+#' med_norm_adat <- medianNormalize(adat,
+#'                                  do_field = "SampleType",
+#'                                  do_regexp = "Sample")
+#' }
+#' @importFrom dplyr filter
+#' @importFrom stats median
+#' @importFrom utils read.table read.csv
+#' @export
+medianNormalize <- function(adat,
+                            reference = NULL,
+                            ref_field = "SampleType",
+                            ref_value = c("QC", "Sample"),
+                            by = "SampleType",
+                            do_field = "SampleType",
+                            do_regexp = "QC|Sample",
+                            verbose = TRUE) {
+
+  # Input validation ----
+  stopifnot("`adat` must be a class `soma_adat` object" = is.soma_adat(adat))
+
+  # Check that required normalization steps have been applied ----
+  header <- attr(adat, "Header.Meta")$HEADER
+
+  if (is.null(header)) {
+    stop("ADAT header metadata is missing", call. = FALSE)
+  }
+
+  # Check for hybrid normalization
+  if (!"ProcessSteps" %in% names(header) ||
+      !grepl("Hyb Normalization", header$ProcessSteps, ignore.case = TRUE)) {
+    stop(
+      "Hybrid normalization step not detected in ProcessSteps. ",
+      "Please apply hybrid normalization before median normalization.",
+      call. = FALSE
+    )
+  }
+
+  # Check for plate scale factors
+  norm_cols <- grep("^(Norm|norm)[Ss]cale", names(adat), value = TRUE)
+  if (length(norm_cols) == 0) {
+    stop(
+      "No normalization scale factor columns found. ",
+      "Please ensure plate scale normalization has been applied.",
+      call. = FALSE
+    )
+  }
+
+  # Create dilution groups ----
+  apt_data <- getAnalyteInfo(adat)
+
+  if (!"Dilution" %in% names(apt_data)) {
+    stop("Dilution information not found in analyte data", call. = FALSE)
+  }
+
+  # Filter out hybridization controls
+  apt_data <- filter(apt_data, !grepl("^Hybridization", Type, ignore.case = TRUE))
+
+  # Create dilution groups
+  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
+
+  # Clean up dilution names
+  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
+  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
+
+  if (length(dil_groups) < 2L) {
+    warning("Fewer than 2 dilution groups detected", call. = FALSE)
+  }
+
+  # Check for existing normalization scale factors ----
+  existing_norm_sf <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if ( verbose ) {
+    if (length(existing_norm_sf) > 0) {
+      .todo("Normalization scale factors already exist: {.val {paste0(existing_norm_sf, collapse = ', ')}} - they will be replaced with new scale factors")
+    }
+  }
+
+
+  # Determine which samples to normalize ----
+  if (!is.null(do_field) && !is.null(do_regexp)) {
+    if (!do_field %in% names(adat)) {
+      stop("Field `", do_field, "` not found in adat columns", call. = FALSE)
+    }
+
+    do_samples <- grep(do_regexp, adat[[do_field]])
+    if (length(do_samples) == 0L) {
+      stop(
+        "No samples selected for normalization with pattern: ", do_regexp,
+        call. = FALSE
+      )
+    }
+    dont_samples <- setdiff(seq_len(nrow(adat)), do_samples)
+  } else {
+    do_samples <- seq_len(nrow(adat))
+    dont_samples <- NULL
+  }
+
+  # Process reference ----
+  if (is.null(reference)) {
+    # Check if ref_field conflicts with grouping variables
+    conflicts_with_grouping <- FALSE
+    if (ref_field %in% by) {
+      samples_to_normalize <- adat[do_samples, ]
+      group_values <- unique(samples_to_normalize[[ref_field]])
+      group_values <- group_values[!is.na(group_values)]
+      all_groups_in_ref <- all(group_values %in% ref_value)
+      conflicts_with_grouping <- !identical(by, ref_field) || !all_groups_in_ref
+    }
+
+    if (conflicts_with_grouping) {
+      # Calculate global reference to avoid groups lacking reference samples
+      if (verbose) {
+        .todo("Building global internal reference from field: {.val {ref_field}} with values: {.val {ref_value}}")
+      }
+      ref_data <- .buildInternalReference(adat, ref_field, ref_value, dil_groups)
+    } else {
+      # Standard internal reference - calculate per group
+      ref_data <- NULL
+      if (verbose) {
+        .todo("Building internal reference from field: {.val {ref_field}} with values: {.val {ref_value}}")
+      }
+    }
+  } else {
+    ref_data <- .processReference(reference, ref_field, ref_value, adat, dil_groups, apt_data, verbose)
+  }
+
+  # Add row identifier to maintain order
+  adat$.rowid <- seq_len(nrow(adat))
+
+  # Perform median normalization on selected samples
+  if (length(do_samples) > 0) {
+    norm_adat <- .performMedianNorm(
+      adat[do_samples, ],
+      dil_groups = dil_groups,
+      by = by,
+      ref_data = ref_data,
+      ref_field = ref_field,
+      ref_value = ref_value,
+      verbose = verbose
+    )
+  }
+
+  # Handle samples that were not normalized
+  if (!is.null(dont_samples) && length(dont_samples) > 0) {
+    unnorm_adat <- adat[dont_samples, ]
+    sf_cols <- paste0("NormScale_", names(dil_groups))
+
+    # Check if scale factor columns already exist in the original data
+    existing_sf_cols <- intersect(sf_cols, names(adat))
+
+    # For all scale factor columns, preserve existing values or set to 1.0
+    for (col in sf_cols) {
+      if (col %in% existing_sf_cols) {
+        # Keep the existing value as-is - unnorm_adat[[col]] already contains it
+      } else {
+        # Set to 1.0 for new scale factor columns
+        unnorm_adat[[col]] <- 1.0
+      }
+    }
+
+    # Ensure column order matches
+    unnorm_adat <- unnorm_adat[, names(norm_adat)]
+    norm_adat <- rbind(norm_adat, unnorm_adat)
+  }
+
+  # Restore original order
+  norm_adat <- norm_adat[order(norm_adat$.rowid), ]
+  norm_adat$.rowid <- NULL
+
+  # Update header metadata
+  .updateHeaderMetadata(norm_adat, reference)
+}
+
+
+#' Process Reference Data
+#' @noRd
+.processReference <- function(reference, ref_field, ref_value, adat, dil_groups, apt_data, verbose) {
+
+  if (is.soma_adat(reference)) {
+    # Use reference from provided ADAT
+    if (verbose) {
+      .todo("Using reference from provided ADAT object")
+    }
+    return(.extractReferenceFromAdat(reference, dil_groups))
+
+  } else if (is.character(reference) && length(reference) == 1) {
+    # External reference file
+    if (verbose) {
+      .todo("Reading external reference from file: {.val {reference}}")
+    }
+    return(.readExternalReference(reference, dil_groups, apt_data))
+
+  } else if (is.data.frame(reference)) {
+    # Reference data provided directly
+    if (verbose) {
+      .todo("Using provided reference data.frame")
+    }
+    return(.validateReferenceData(reference, dil_groups, apt_data))
+
+  } else {
+    stop(
+      "Invalid reference type. Must be NULL, soma_adat, file path, or data.frame",
+      call. = FALSE
+    )
+  }
+}
+
+#' Build Internal Reference from Study Samples
+#' @noRd
+.buildInternalReference <- function(adat, ref_field, ref_value, dil_groups) {
+
+  if (!ref_field %in% names(adat)) {
+    stop("Reference field `", ref_field, "` not found", call. = FALSE)
+  }
+
+  # Select reference samples
+  ref_samples <- adat[[ref_field]] %in% ref_value
+  if (sum(ref_samples) == 0) {
+    stop(
+      "No reference samples found with field `", ref_field,
+      "` and values: ", paste(ref_value, collapse = ", "),
+      call. = FALSE
+    )
+  }
+
+  ref_adat <- adat[ref_samples, ]
+
+  # Calculate reference medians for each dilution group
+  ref_data <- list()
+  for (dil_name in names(dil_groups)) {
+    dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+    if (length(dil_apts) > 0) {
+      ref_data[[dil_name]] <- apply(ref_adat[, dil_apts, drop = FALSE], 2, median, na.rm = TRUE)
+    }
+  }
+
+  ref_data
+}
+
+#' Extract Reference from ADAT
+#' @noRd
+.extractReferenceFromAdat <- function(ref_adat, dil_groups) {
+
+  # Calculate reference medians for each dilution group
+  ref_data <- list()
+  for (dil_name in names(dil_groups)) {
+    dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(ref_adat))
+    if (length(dil_apts) > 0) {
+      ref_data[[dil_name]] <- apply(ref_adat[, dil_apts, drop = FALSE], 2, median, na.rm = TRUE)
+    }
+  }
+
+  ref_data
+}
+
+#' Read External Reference File
+#' @noRd
+.readExternalReference <- function(file_path, dil_groups, apt_data) {
+
+  if (!file.exists(file_path)) {
+    stop("Reference file not found: ", file_path, call. = FALSE)
+  }
+
+  # Determine file type and read
+  file_ext_val <- file_ext(file_path)
+
+  if (file_ext_val %in% c("csv")) {
+    ref_df <- read.csv(file_path, stringsAsFactors = FALSE)
+  } else if (file_ext_val %in% c("txt", "tsv")) {
+    ref_df <- utils::read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+  } else {
+    # Try to auto-detect
+    ref_df <- tryCatch({
+      read.csv(file_path, stringsAsFactors = FALSE)
+    }, error = function(e) {
+      utils::read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+    })
+  }
+
+  .validateReferenceData(ref_df, dil_groups, apt_data)
+}
+
+#' Validate Reference Data
+#' @noRd
+.validateReferenceData <- function(ref_df, dil_groups, apt_data = NULL) {
+
+  # Check that required columns are present
+  required_cols <- c("Dilution", "Reference")
+  if (!all(required_cols %in% names(ref_df))) {
+    missing_cols <- setdiff(required_cols, names(ref_df))
+    stop(
+      "Reference data must contain columns: ", paste(missing_cols, collapse = ", "),
+      call. = FALSE
+    )
+  }
+
+  # Check if we have SeqId-specific information
+  if ("SeqId" %in% names(ref_df)) {
+    # Process as SeqId-specific reference
+    return(.processSeqIdReference(ref_df, dil_groups, apt_data))
+  } else {
+    # Process as simple dilution-level reference
+    return(.processSimpleReference(ref_df, dil_groups))
+  }
+}
+
+#' Process SeqId-Specific Reference Data
+#' @noRd
+.processSeqIdReference <- function(ref_df, dil_groups, apt_data) {
+  # Convert dilution values to character for consistent matching
+  ref_df$Dilution <- as.character(ref_df$Dilution)
+
+  if (!is.null(apt_data) && !"SeqId" %in% names(apt_data)) {
+    stop("ADAT analyte data must contain SeqId column for SeqId reference matching", call. = FALSE)
+  }
+
+  # Create a global reference mapping from all dilutions
+  # Expect SeqId column in reference data
+  if (!"SeqId" %in% names(ref_df)) {
+    stop("Reference data must contain SeqId column for SeqId reference matching", call. = FALSE)
+  }
+  global_seqid_refs <- setNames(ref_df$Reference, ref_df$SeqId)
+
+  ref_data <- list()
+
+  # Process each dilution group using the global reference
+  for (dil_name in names(dil_groups)) {
+    dil_apt_names <- dil_groups[[dil_name]]
+
+    # Convert AptNames to SeqIds and find matches in global reference
+    dil_seq_ids <- apt_data$SeqId[apt_data$AptName %in% dil_apt_names]
+    matching_seq_ids <- intersect(dil_seq_ids, names(global_seqid_refs))
+
+    if (length(matching_seq_ids) > 0) {
+      # Convert back to AptNames for the final result
+      matching_apt_names <- apt_data$AptName[apt_data$SeqId %in% matching_seq_ids]
+      ref_values <- global_seqid_refs[matching_seq_ids]
+      names(ref_values) <- matching_apt_names
+      ref_data[[dil_name]] <- ref_values
+    }
+  }
+
+  # Check that we have some references
+  if (length(ref_data) == 0) {
+    stop("No matching SeqIds or dilution groups found in reference data", call. = FALSE)
+  }
+
+  ref_data
+}
+
+#' Process Simple Reference Data (Dilution -> Reference)
+#' @noRd
+.processSimpleReference <- function(ref_df, dil_groups) {
+  # Convert to named list
+  ref_data <- setNames(ref_df$Reference, ref_df$Dilution)
+
+  # Check that we have references for all dilution groups
+  missing_dils <- setdiff(names(dil_groups), names(ref_data))
+  if (length(missing_dils) > 0) {
+    stop(
+      "Missing reference values for dilution groups: ", paste(missing_dils, collapse = ", "),
+      call. = FALSE
+    )
+  }
+
+  ref_data
+}
+
+#' Perform Median Normalization
+#' @noRd
+.performMedianNorm <- function(adat, dil_groups, by, ref_data, ref_field, ref_value, verbose) {
+
+  # Store original rownames to restore later
+  original_rownames <- rownames(adat)
+
+  # Validate grouping variables
+  if (is.character(by) && length(by) > 0) {
+    missing_cols <- setdiff(by, names(adat))
+    if (length(missing_cols) > 0) {
+      stop("Grouping column(s) not found: ", paste(missing_cols, collapse = ", "),
+           call. = FALSE)
+    }
+  }
+
+  # Create grouping variable
+  if (length(by) == 1L) {
+    group_var <- adat[[by]]
+  } else if (length(by) > 1L) {
+    group_var <- apply(adat[, by, drop = FALSE], 1, paste, collapse = "__")
+  } else {
+    group_var <- rep("all", nrow(adat))
+  }
+
+  # Report grouping strategy if verbose
+  if (verbose) {
+    if (length(by) > 1L) {
+      .todo("Performing grouped median normalization by: {.val {paste(by, collapse = ', ')}}")
+    } else if (length(by) == 1L && by != "SampleType") {
+      .todo("Performing grouped median normalization by: {.val {by}}")
+    } else if (length(unique(group_var)) > 1) {
+      .todo("Performing grouped median normalization by: {.val {by}} ({.val {length(unique(group_var))}} groups)")
+    }
+  }
+
+  adat$.group <- group_var
+
+  # Split data by groups and process each group separately
+  groups <- unique(group_var)
+  result_list <- list()
+
+  for (grp in groups) {
+    grp_samples <- which(group_var == grp)
+    grp_adat <- adat[grp_samples, , drop = FALSE]
+
+    if (verbose && length(groups) > 1) {
+      .todo("Processing group: {.val {grp}} ({.val {length(grp_samples)}} samples)")
+    }
+
+    # Calculate scale factors for each dilution group within this sample group
+    for (dil_name in names(dil_groups)) {
+      sf_col <- paste0("NormScale_", dil_name)
+
+      # Initialize scale factor column
+      if (!sf_col %in% names(grp_adat)) {
+        grp_adat[[sf_col]] <- 1.0
+      }
+
+      # Get analytes in this dilution
+      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(grp_adat))
+
+      if (length(dil_apts) == 0) {
+        next
+      }
+
+      if (verbose) {
+        .done("Processing dilution '{dil_name}' with {length(dil_apts)} analytes")
+      }
+
+      # Calculate reference values for this dilution
+      if (!is.null(ref_data) && dil_name %in% names(ref_data)) {
+        # Use external reference
+        ref_values <- ref_data[[dil_name]]
+
+        if (is.numeric(ref_values) && length(ref_values) == 1) {
+          # Single reference value for the whole dilution group
+          grp_ref_values <- rep(ref_values, length(dil_apts))
+          names(grp_ref_values) <- dil_apts
+        } else if (is.numeric(ref_values) && length(ref_values) > 1) {
+          # Aptamer-specific reference values
+          grp_ref_values <- ref_values[dil_apts]
+          has_ref <- !is.na(grp_ref_values)
+
+          if (any(has_ref)) {
+            dil_apts <- dil_apts[has_ref]
+            grp_ref_values <- grp_ref_values[has_ref]
+          } else {
+            next  # No references available, skip this dilution group
+          }
+        } else {
+          # Fallback to group-specific calculation
+          grp_ref_values <- apply(grp_adat[, dil_apts, drop = FALSE], 2, median, na.rm = TRUE)
+        }
+      } else {
+        # Internal reference: Use samples from this group only
+        if (!ref_field %in% names(grp_adat)) {
+          stop("Reference field `", ref_field, "` not found", call. = FALSE)
+        }
+
+        ref_samples_mask <- grp_adat[[ref_field]] %in% ref_value
+        if (sum(ref_samples_mask) == 0) {
+          stop(
+            "No reference samples found with field `", ref_field,
+            "` and values: ", paste(ref_value, collapse = ", "),
+            call. = FALSE
+          )
+        }
+
+        ref_sample_data <- grp_adat[ref_samples_mask, dil_apts, drop = FALSE]
+        grp_ref_values <- apply(ref_sample_data, 2, median, na.rm = TRUE)
+      }
+
+      # Calculate scale factors for each sample in this group
+      for (i in seq_len(nrow(grp_adat))) {
+        sample_values <- as.numeric(grp_adat[i, dil_apts, drop = FALSE])
+        ratios <- grp_ref_values / sample_values
+        med_scale_factor <- median(ratios[is.finite(ratios)], na.rm = TRUE)
+
+        if (!is.finite(med_scale_factor)) {
+          med_scale_factor <- 1.0
+        }
+
+        grp_adat[[sf_col]][i] <- med_scale_factor
+      }
+
+      # Apply scale factors to analytes
+      for (apt in dil_apts) {
+        grp_adat[[apt]] <- grp_adat[[apt]] * grp_adat[[sf_col]]
+      }
+    }
+
+    result_list[[as.character(grp)]] <- grp_adat
+  }
+
+  # Combine results and restore original order
+  adat <- do.call(rbind, result_list)
+  adat <- adat[order(adat$.rowid), ]
+
+  # Restore original rownames
+  rownames(adat) <- original_rownames
+
+  # Remove temporary grouping column
+  adat$.group <- NULL
+
+  # Round to 1 decimal place (standard for SomaScan data)
+  apts <- getAnalytes(adat)
+  for (apt in apts) {
+    adat[[apt]] <- round(adat[[apt]], 1)
+  }
+
+  adat
+}
+
+#' Update Header Metadata
+#' @noRd
+.updateHeaderMetadata <- function(adat, reference) {
+  header_meta <- attr(adat, "Header.Meta")
+
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    # Add median normalization to process steps
+    if ("ProcessSteps" %in% names(header_meta$HEADER)) {
+      if (!grepl("medNormInt", header_meta$HEADER$ProcessSteps)) {
+        header_meta$HEADER$ProcessSteps <- paste(
+          header_meta$HEADER$ProcessSteps,
+          "medNormInt (SampleId)",
+          sep = ", "
+        )
+      }
+    } else {
+      header_meta$HEADER$ProcessSteps <- "medNormInt (SampleId)"
+    }
+
+    # Set normalization algorithm
+    header_meta$HEADER$NormalizationAlgorithm <- "MedNorm"
+
+    # Set reference type
+    if (is.null(reference)) {
+      header_meta$HEADER$MedNormReference <- "intraplate"
+    } else if (is.soma_adat(reference)) {
+      header_meta$HEADER$MedNormReference <- "external_adat"
+    } else if (is.character(reference)) {
+      header_meta$HEADER$MedNormReference <- basename(reference)
+    } else {
+      header_meta$HEADER$MedNormReference <- "external_data"
+    }
+
+    attr(adat, "Header.Meta") <- header_meta
+  }
+
+  adat
+}
diff --git a/inst/WORDLIST b/inst/WORDLIST
index efd92edc..4c2a38cc 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -113,6 +113,7 @@ md
 medNormRef
 nd
 normals
+pipetting
 pkgdown
 plex
 pre
@@ -128,4 +129,5 @@ th
 tibble
 tidyr
 untransformed
+variabilities
 vectorized
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
new file mode 100644
index 00000000..00a22ad8
--- /dev/null
+++ b/man/medianNormalize.Rd
@@ -0,0 +1,141 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/medianNormalize.R
+\name{medianNormalize}
+\alias{medianNormalize}
+\title{Perform Median Normalization}
+\usage{
+medianNormalize(
+  adat,
+  reference = NULL,
+  ref_field = "SampleType",
+  ref_value = c("QC", "Sample"),
+  by = "SampleType",
+  do_field = "SampleType",
+  do_regexp = "QC|Sample",
+  verbose = TRUE
+)
+}
+\arguments{
+\item{adat}{A \code{soma_adat} object created using \code{\link[=read_adat]{read_adat()}}, containing
+RFU values that have been hybridization normalized and plate scaled.}
+
+\item{reference}{Optional. Reference for median normalization. Can be:
+\itemize{
+\item \code{NULL} (default): Use internal reference from study samples
+\item A \code{soma_adat} object: Extract reference from this ADAT
+\item A file path (character): Read external reference from tab/comma-separated file
+\item A data.frame: Use provided reference data directly
+}
+When providing an external reference file or data.frame it must contain:
+\describe{
+\item{Dilution}{Character column specifying the dilution group names
+(e.g., "0_005", "0_5", "20"). These should match the dilution
+groups present in the ADAT analyte data.}
+\item{Reference}{Numeric column containing the reference median values
+for each dilution group. These values will be used as the target
+medians for normalization.}
+\item{SeqId}{Optional character column. When included, SeqId-specific
+reference values are used for more precise normalization. Values
+must be in "10000-28" format, not "seq.10000.28" format.}
+}}
+
+\item{ref_field}{Character. Field used to select reference samples when
+\code{reference = NULL}. Default is \code{"SampleType"}.}
+
+\item{ref_value}{Character. Value(s) in \code{ref_field} to use as reference
+when \code{reference = NULL}. Default is \code{c("QC", "Sample")}.}
+
+\item{by}{Character vector. Grouping variable(s) for grouped median normalization.
+Must be column name(s) in the ADAT. Use grouping when there are known changes
+in total protein load due to analysis conditions (e.g., disease vs. control,
+treatment vs. baseline). Normalization will be performed within each group
+separately. Default is \code{"SampleType"}.}
+
+\item{do_field}{Character. The field used to select samples for normalization
+(others keep original values). Default is \code{"SampleType"}.}
+
+\item{do_regexp}{Character. A regular expression pattern to select samples
+from \code{do_field} for normalization. Default is \code{"QC|Sample"}.}
+
+\item{verbose}{Logical. Should progress messages be printed? Default is \code{TRUE}.}
+}
+\value{
+A \code{soma_adat} object with median normalization applied and RFU values
+adjusted. The existing \verb{NormScale_*} columns are updated to include the
+effects of both plate scale normalization and median normalization.
+}
+\description{
+Performs median normalization on a \code{soma_adat} object that has
+already undergone hybridization normalization and contains plate scale
+normalization factors.
+
+Median normalization is a technique used to adjust data sets to remove
+certain assay artifacts and biases prior to analysis, particularly those that
+may arise due to differences in overall protein concentration, pipetting
+errors, reagent concentration changes, assay timing, and other systematic
+variabilities. Median normalization can improve assay precision and reduce
+technical variations that can mask true biological signals.
+
+This function works by calculating or using reference median values for each
+dilution group present in the ADAT. Dilution groups are determined from the
+analyte metadata and represent different sample dilutions used in the assay
+(e.g., 1:1, 1:200, 1:2000). For each sample, the median RFU across all
+analytes within a dilution group is calculated and compared to the reference
+median for that group. Scale factors are then computed and applied to adjust
+each sample's values toward the reference, ensuring consistent signal levels
+across samples within each dilution group. This function supports multiple
+reference approaches: calculating an internal reference from study samples,
+or using a calculated reference from a different ADAT or external reference
+file.
+}
+\section{Important Considerations}{
+
+\itemize{
+\item If there is a known change in total protein concentration due to analysis
+conditions, perform normalization \emph{within} groups using the \code{by} parameter
+\item In newer SomaScan assay versions, population references improve
+inter-plate consistency compared to within-plate references
+}
+}
+
+\examples{
+\dontrun{
+# Internal reference from study samples (default)
+# Use when you have representative QC or control samples in your study
+med_norm_adat <- medianNormalize(adat)
+
+# Reference from specific samples in the ADAT
+# Use when you want to normalize against only QC samples
+med_norm_adat <- medianNormalize(adat,
+                                 ref_field = "SampleType",
+                                 ref_value = "QC")
+
+# Reference from another ADAT
+# Use when you have a reference population or control study
+ref_adat <- read_adat("reference_file.adat")
+med_norm_adat <- medianNormalize(adat, reference = ref_adat)
+
+# External reference file
+# Use when you have pre-calculated reference medians for each dilution
+med_norm_adat <- medianNormalize(adat, reference = "reference_file.csv")
+
+# External reference as data.frame
+# Use for programmatic control over reference values
+ref_data <- data.frame(
+  Dilution = c("0_005", "0_5", "20"),
+  Reference = c(1500.2, 3200.8, 4100.5)
+)
+med_norm_adat <- medianNormalize(adat, reference = ref_data)
+
+# Custom grouping by multiple variables
+# Use when total protein load changes due to analysis conditions
+# (normalize within groups to account for expected biological differences)
+med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
+
+# Normalize only specific sample types
+# Use when you want to preserve original values for certain sample types
+med_norm_adat <- medianNormalize(adat,
+                                 do_field = "SampleType",
+                                 do_regexp = "Sample")
+}
+}
diff --git a/man/soma_adat.Rd b/man/soma_adat.Rd
index 26127fdc..d117ebd1 100644
--- a/man/soma_adat.Rd
+++ b/man/soma_adat.Rd
@@ -91,16 +91,18 @@ dispatched on class \code{soma_adat}.
 Below is a list of \emph{all} currently available S3 methods that dispatch on
 the \code{soma_adat} class:
 
-\if{html}{\out{<div class="sourceCode">}}\preformatted{#>  [1] [              [[             [[<-           [<-           
-#>  [5] ==             $              $<-            anti_join     
-#>  [9] arrange        count          filter         full_join     
-#> [13] getAdatVersion getAnalytes    getMeta        group_by      
-#> [17] inner_join     is_seqFormat   left_join      Math          
-#> [21] median         merge          mutate         Ops           
-#> [25] print          rename         right_join     row.names<-   
-#> [29] sample_frac    sample_n       select         semi_join     
-#> [33] separate       slice_sample   slice          summary       
-#> [37] Summary        transform      ungroup        unite         
+\if{html}{\out{<div class="sourceCode">}}\preformatted{#>  [1] [               [[              [[<-            [<-            
+#>  [5] ==              $               $<-             anti_join      
+#>  [9] arrange         centerScaleData convertAptNames count          
+#> [13] filter          full_join       getAdatVersion  getAnalytes    
+#> [17] getMeta         group_by        imputeNAs       inner_join     
+#> [21] is_seqFormat    is.logspace     left_join       Math           
+#> [25] median          merge           mutate          Ops            
+#> [29] print           rename          revertAptNames  right_join     
+#> [33] row.names<-     sample_frac     sample_n        select         
+#> [37] semi_join       separate        slice_sample    slice          
+#> [41] summary         Summary         transform       ungroup        
+#> [45] unite          
 #> see '?methods' for accessing help and source code
 }\if{html}{\out{</div>}}
 
diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
new file mode 100644
index 00000000..8ee3491c
--- /dev/null
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -0,0 +1,17 @@
+# `medianNormalize` produces expected verbose output
+
+    Code
+      result <- medianNormalize(test_data, verbose = TRUE)
+    Message
+      > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
+      > Building internal reference from field: "SampleType" with values: "QC" and "Sample"
+      > Performing grouped median normalization by: "SampleType" (2 groups)
+      > Processing group: "Sample" (12 samples)
+      v Processing dilution '0_005' with 173 analytes
+      v Processing dilution '0_5' with 828 analytes
+      v Processing dilution '20' with 4271 analytes
+      > Processing group: "QC" (4 samples)
+      v Processing dilution '0_005' with 173 analytes
+      v Processing dilution '0_5' with 828 analytes
+      v Processing dilution '20' with 4271 analytes
+
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
new file mode 100644
index 00000000..6634c43f
--- /dev/null
+++ b/tests/testthat/test-medianNormalize.R
@@ -0,0 +1,363 @@
+# Setup ----
+# Create a minimal test dataset
+create_test_data <- function() {
+  data("example_data", package = "SomaDataIO")
+  # Select samples ensuring we have both Sample and QC types
+  sample_indices <- which(example_data$SampleType == "Sample")[1:12]
+  qc_indices <- which(example_data$SampleType == "QC")[1:4]
+
+  # Combine indices and subset
+  subset_indices <- c(sample_indices, qc_indices)
+
+  # Return subset with representative samples
+  example_data[subset_indices, ]
+}
+
+test_data <- create_test_data()
+
+# Testing ----
+test_that("`medianNormalize` Method 1: Internal reference (default)", {
+
+  # Test default internal reference method
+  expect_no_error(
+    result <- medianNormalize(test_data, verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+  expect_true(all(norm_cols %in% names(result)))
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(1.067845, 1.057102, 1.091748), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.022533, 1.001174, 1.012215), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(1.103112, 1.054358, 1.108711), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(508.8, 501.5, 453.7))
+  expect_equal(result$seq.10008.43[1:3], c(599.9, 572.8, 462.8))
+
+  # Check header metadata
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_true(grepl("medNormInt", result_header$ProcessSteps))
+  expect_equal(result_header$NormalizationAlgorithm, "MedNorm")
+  expect_equal(result_header$MedNormReference, "intraplate")
+})
+
+test_that("`medianNormalize` Method 2: Reference from specific samples", {
+
+  # Test with specific reference field and value
+  expect_no_error(
+    result <- medianNormalize(test_data,
+                              ref_field = "SampleType",
+                              ref_value = "QC",
+                              do_regexp = "QC",
+                              verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 0.9841162), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1.0327016), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 0.9151915), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 415.6))
+  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 423.9))
+
+  # Check header metadata
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_equal(result_header$MedNormReference, "intraplate")
+})
+
+test_that("`medianNormalize` Method 3: Reference from another ADAT", {
+  # Create a minimal reference ADAT from sample subset
+  ref_adat <- test_data[1:4, ]  # Use first 4 samples as reference
+
+  expect_no_error(
+    result <- medianNormalize(test_data, reference = ref_adat, verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(0.9907154, 1.0109152, 0.9931271), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.005733, 1.008454, 0.986035), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(1.0090878, 0.9909594, 1.0002446), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(472.1, 479.6, 412.7))
+  expect_equal(result$seq.10008.43[1:3], c(556.6, 547.8, 421.0))
+
+  # Check header metadata
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_equal(result_header$MedNormReference, "external_adat")
+})
+
+test_that("`medianNormalize` Method 4: External reference file", {
+  # Create a temporary CSV file with reference data
+  # Use realistic dilution groups from example_data: "20", "0.5", "0.005", "0"
+  ref_data <- data.frame(
+    Dilution = c("20", "0_5", "0_005", "0"),
+    Reference = c(2500.5, 1800.2, 3200.8, 1500.0),
+    stringsAsFactors = FALSE
+  )
+
+  # Create temporary CSV file
+  temp_csv <- tempfile(fileext = ".csv")
+  write.csv(ref_data, temp_csv, row.names = FALSE)
+
+  expect_no_error(
+    result <- medianNormalize(test_data, reference = temp_csv, verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
+  expect_equal(result$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
+
+  # Check header metadata
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_equal(result_header$MedNormReference, basename(temp_csv))
+
+  # Clean up
+  unlink(temp_csv)
+
+  # Test with TSV file
+  temp_tsv <- tempfile(fileext = ".tsv")
+  write.table(ref_data, temp_tsv, sep = "\t", row.names = FALSE)
+
+  expect_no_error(
+    result2 <- medianNormalize(test_data, reference = temp_tsv, verbose = FALSE)
+  )
+
+  expect_true(is.soma_adat(result2))
+
+  # Test scale factor output
+  expect_equal(result2$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
+  expect_equal(result2$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
+  expect_equal(result2$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result2$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
+  expect_equal(result2$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
+
+  # Clean up
+  unlink(temp_tsv)
+})
+
+test_that("`medianNormalize` Method 5: External reference as data.frame", {
+
+  # Create reference data.frame with realistic dilution groups
+  ref_data <- data.frame(
+    Dilution = c("20", "0_5", "0_005", "0"),
+    Reference = c(2500.5, 1800.2, 3200.8, 1500.0),
+    stringsAsFactors = FALSE
+  )
+
+  expect_no_error(
+    result <- medianNormalize(test_data, reference = ref_data, verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
+  expect_equal(result$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
+
+  # Check header metadata
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_equal(result_header$MedNormReference, "external_data")
+})
+
+test_that("`medianNormalize` validates input requirements", {
+
+  # Test with non-soma_adat object
+  expect_error(
+    medianNormalize(data.frame(a = 1:3, b = 4:6)),
+    "`adat` must be a class `soma_adat` object"
+  )
+
+  # Test with missing hybridization normalization
+  test_data_no_hyb <- test_data
+  header_meta <- list(HEADER = list(ProcessSteps = "SomeOtherStep"))
+  attr(test_data_no_hyb, "Header.Meta") <- header_meta
+
+  expect_error(
+    medianNormalize(test_data_no_hyb),
+    "Hybrid normalization step not detected"
+  )
+
+  # Test with missing normalization scale factors
+  test_data_no_norm <- test_data |>
+    dplyr::select(-c(NormScale_0_005, NormScale_0_5, NormScale_20))
+  header_meta <- list(HEADER = list(ProcessSteps = "Hyb Normalization"))
+  attr(test_data_no_norm, "Header.Meta") <- header_meta
+
+  expect_error(
+    medianNormalize(test_data_no_norm),
+    "No normalization scale factor columns found"
+  )
+
+  # Test with invalid reference field
+  expect_error(
+    medianNormalize(test_data, ref_field = "NonExistentField", verbose = FALSE),
+    "Reference field `NonExistentField` not found"
+  )
+
+  # Test with invalid reference data.frame (missing required columns)
+  invalid_ref <- data.frame(Wrong = c("20", "0_5"), Column = c(1000, 2000))
+  expect_error(
+    medianNormalize(test_data, reference = invalid_ref, verbose = FALSE),
+    "Reference data must contain columns: Dilution, Reference"
+  )
+
+  # Test with non-existent reference file
+  expect_error(
+    medianNormalize(test_data, reference = "non_existent_file.csv", verbose = FALSE),
+    "Reference file not found"
+  )
+
+  # Test with invalid reference type
+  expect_error(
+    medianNormalize(test_data, reference = 123, verbose = FALSE),
+    "Invalid reference type"
+  )
+})
+
+test_that("`medianNormalize` external reference file validation", {
+  # Test with missing dilution groups in reference
+  incomplete_ref <- data.frame(
+    Dilution = c("20", "0_5"),  # Missing "0_005" and "0"
+    Reference = c(2500, 1800),
+    stringsAsFactors = FALSE
+  )
+
+  expect_error(
+    medianNormalize(test_data, reference = incomplete_ref, verbose = FALSE),
+    "Missing reference values for dilution groups"
+  )
+})
+
+test_that("`medianNormalize` handles grouping correctly", {
+  # Test grouping by Sex
+  expect_no_error(
+    result1 <- medianNormalize(test_data, by = "Sex", do_regexp = "Sample", verbose = FALSE)
+  )
+
+  # Check result structure
+  expect_true(is.soma_adat(result1))
+  norm_cols <- grep("^NormScale_", names(result1), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result1$NormScale_20[1:3], c(1.066964, 1.060173, 1.020341), tolerance = 0.0001)
+  expect_equal(result1$NormScale_0_005[1:3], c(1.000455, 1.005372, 1.000000), tolerance = 0.0001)
+  expect_equal(result1$NormScale_0_5[1:3], c(1.115562, 1.050018, 1.030397), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result1$seq.10000.28[1:3], c(508.4, 502.9, 424.1))
+  expect_equal(result1$seq.10008.43[1:3], c(599.4, 574.5, 432.5))
+
+  # Test grouping by multiple variables
+  if ("PlateId" %in% names(test_data)) {
+    expect_no_error(
+      result2 <- medianNormalize(test_data, by = c("PlateId", "SampleType"), verbose = FALSE)
+    )
+
+    # Check result structure
+    expect_true(is.soma_adat(result2))
+    norm_cols <- grep("^NormScale_", names(result2), value = TRUE)
+    expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+    # Test scale factor output
+    expect_equal(result2$NormScale_20[1:3], c(1.053656, 1.059554, 1.068602), tolerance = 0.0001)
+    expect_equal(result2$NormScale_0_005[1:3], c(1.0124749, 1.0053543, 0.9992799), tolerance = 0.0001)
+    expect_equal(result2$NormScale_0_5[1:3], c(1.080347, 1.056874, 1.074292), tolerance = 0.0001)
+
+    # Test specific SeqId columns
+    expect_equal(result2$seq.10000.28[1:3], c(502.1, 502.7, 444.1))
+    expect_equal(result2$seq.10008.43[1:3], c(591.9, 574.2, 453.0))
+  }
+
+  # Test error with non-existent grouping column
+  expect_error(
+    medianNormalize(test_data, by = "NonExistentColumn", verbose = FALSE),
+    "Grouping column\\(s\\) not found"
+  )
+})
+
+test_that("`medianNormalize` handles sample selection correctly", {
+  # Test selective normalization
+  expect_no_error(
+    result <- medianNormalize(test_data,
+                              do_field = "SampleType",
+                              do_regexp = "QC",
+                              verbose = FALSE)
+  )
+  expect_true(is.soma_adat(result))
+
+  # Check result structure
+  expect_true(is.soma_adat(result))
+  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
+  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
+
+  # Test scale factor output
+  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 0.9841162), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1.0327016), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 0.9151915), tolerance = 0.0001)
+
+  # Test specific SeqId columns
+  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 415.6))
+  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 423.9))
+
+  # Test with non-existent field
+  expect_error(
+    medianNormalize(test_data, do_field = "NonExistentField", verbose = FALSE),
+    "Field `NonExistentField` not found"
+  )
+
+  # Test with pattern that matches no samples
+  expect_error(
+    medianNormalize(test_data, do_regexp = "NoMatchPattern", verbose = FALSE),
+    "No samples selected for normalization"
+  )
+})
+
+test_that("`medianNormalize` produces expected verbose output", {
+  # specific verbose output messages
+  expect_snapshot(
+    result <- medianNormalize(test_data, verbose = TRUE)
+  )
+
+  # no output
+  expect_message(
+    result <- medianNormalize(test_data, verbose = FALSE),
+    NA  # NA indicates no output expected
+  )
+})

From 49ef92cd2c9dd1f157e61f6115ae6fb7c81d74f6 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Tue, 13 Jan 2026 16:42:17 -0500
Subject: [PATCH 02/10] Enhance medianNormalize() with comprehensive validation

- updated documentation to provide guidance on what type of
  data the function can be applied to
- added data validation for hybridization normalization, plate
  scale factors, and existing processing
- implemented `reverse_existing` parameter to handle
  already-normalized data
- added dilution count validation, accepting 1 or 3, along with
  enhanced error messaging
- now recalculates RowCheck to adjust for new MedNorm values
- now adds `medNormSMP_ReferenceRFU` field to SeqId annotation to
  denote the median mormalization reference
- now updates the header metadata to add `crossplate` entry to
  `MedNormReference` field
---
 R/medianNormalize.R                      | 348 ++++++++++++++++++++++-
 inst/WORDLIST                            |  10 +-
 man/medianNormalize.Rd                   |  41 +++
 man/soma_adat.Rd                         |  22 +-
 tests/testthat/_snaps/medianNormalize.md |  13 +
 tests/testthat/test-medianNormalize.R    |  24 +-
 6 files changed, 426 insertions(+), 32 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index ba24e334..4b44d55e 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -23,12 +23,38 @@
 #' or using a calculated reference from a different ADAT or external reference
 #' file.
 #'
+#' @section Data Requirements:
+#' This function is designed for data in standard SomaLogic deliverable formats:
+#' \enumerate{
+#'   \item \strong{Study samples not median normalized}, with all controls median normalized.
+#'         Calibrators and buffers are intra-plate median normalized and QC samples are ANML
+#'         normalized if present on standard 3-dilution setups. QC assay controls undergo
+#'         full ANML data standardization even when study samples don't, in order to QC
+#'         the plate. Cell & tissue single dilution runs don't have QC samples.
+#'   \item \strong{Same as above}, but study samples undergo either ANML or intra-study
+#'         standard median normalization.
+#' }
+#'
+#' All data should be hybridization normalized, median normalized internally
+#' (buffer + calibrator), and plate scaled regardless of matrix prior to applying
+#' median normalization. Calibration is not applied to all study types.
+#'
+#' \strong{Primary use cases:}
+#' \itemize{
+#'   \item 3-dilution setups (standard SomaScan)
+#'   \item Single dilution cell & tissue studies
+#' }
+#'
 #' @section Important Considerations:
 #' \itemize{
 #'   \item If there is a known change in total protein concentration due to analysis
 #'     conditions, perform normalization _within_ groups using the `by` parameter
 #'   \item In newer SomaScan assay versions, population references improve
 #'     inter-plate consistency compared to within-plate references
+#'   \item The function will not proceed on already ANML or median normalized data
+#'     unless `reverse_existing = TRUE` is specified
+#'   \item When reversing existing normalization, only study samples are reversed;
+#'     QC, Calibrator, and Buffer samples retain their normalization
 #' }
 #'
 #' @param adat A `soma_adat` object created using [read_adat()], containing
@@ -65,6 +91,11 @@
 #'   (others keep original values). Default is `"SampleType"`.
 #' @param do_regexp Character. A regular expression pattern to select samples
 #'   from `do_field` for normalization. Default is `"QC|Sample"`.
+#' @param reverse_existing Logical. Should existing median normalization be
+#'   reversed before applying new normalization? When `TRUE`, existing median
+#'   normalization scale factors are reversed for study samples only (QC,
+#'   Calibrator, and Buffer samples retain their normalization). This allows
+#'   re-normalization of data that has already been median normalized. Default is `FALSE`.
 #' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
 #' @return A `soma_adat` object with median normalization applied and RFU values
 #'   adjusted. The existing `NormScale_*` columns are updated to include the
@@ -108,6 +139,12 @@
 #' med_norm_adat <- medianNormalize(adat,
 #'                                  do_field = "SampleType",
 #'                                  do_regexp = "Sample")
+#'
+#' # Re-normalize data that has already been median normalized
+#' # Use when you want to apply different normalization to previously normalized data
+#' med_norm_adat <- medianNormalize(adat,
+#'                                  reverse_existing = TRUE,
+#'                                  reference = new_reference)
 #' }
 #' @importFrom dplyr filter
 #' @importFrom stats median
@@ -120,6 +157,7 @@ medianNormalize <- function(adat,
                             by = "SampleType",
                             do_field = "SampleType",
                             do_regexp = "QC|Sample",
+                            reverse_existing = FALSE,
                             verbose = TRUE) {
 
   # Input validation ----
@@ -152,6 +190,17 @@ medianNormalize <- function(adat,
     )
   }
 
+  # Check data state and existing normalization ----
+  has_existing_norm <- .validateDataState(adat, header, verbose, reverse_existing)
+
+  # Reverse existing normalization if requested ----
+  if (reverse_existing && has_existing_norm) {
+    if (verbose) {
+      cat("Reversing existing median normalization for study samples...\n")
+    }
+    adat <- .reverseMedNormSMP(adat, verbose)
+  }
+
   # Create dilution groups ----
   apt_data <- getAnalyteInfo(adat)
 
@@ -169,9 +218,8 @@ medianNormalize <- function(adat,
   names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
   names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
 
-  if (length(dil_groups) < 2L) {
-    warning("Fewer than 2 dilution groups detected", call. = FALSE)
-  }
+  # Validate dilution count ----
+  .validateDilutionCount(dil_groups, verbose)
 
   # Check for existing normalization scale factors ----
   existing_norm_sf <- grep("^NormScale_", names(adat), value = TRUE)
@@ -274,6 +322,12 @@ medianNormalize <- function(adat,
   norm_adat <- norm_adat[order(norm_adat$.rowid), ]
   norm_adat$.rowid <- NULL
 
+  # Add medNorm reference to SeqId annotations ----
+  norm_adat <- .addMedNormReference(norm_adat, ref_data, dil_groups)
+
+  # Recalculate RowCheck to adjust for new MedNorm values ----
+  norm_adat <- .recalculateRowCheck(norm_adat, verbose)
+
   # Update header metadata
   .updateHeaderMetadata(norm_adat, reference)
 }
@@ -649,15 +703,21 @@ medianNormalize <- function(adat,
     # Set normalization algorithm
     header_meta$HEADER$NormalizationAlgorithm <- "MedNorm"
 
-    # Set reference type
+    # Set reference type - add "crossplate" as new comma-separated entry
     if (is.null(reference)) {
-      header_meta$HEADER$MedNormReference <- "intraplate"
+      # For internal references, use crossplate
+      current_ref <- header_meta$HEADER$MedNormReference %||% ""
+      if (current_ref == "") {
+        header_meta$HEADER$MedNormReference <- "intraplate, crossplate"
+      } else if (!grepl("crossplate", current_ref)) {
+        header_meta$HEADER$MedNormReference <- paste(current_ref, "crossplate", sep = ", ")
+      }
     } else if (is.soma_adat(reference)) {
-      header_meta$HEADER$MedNormReference <- "external_adat"
+      header_meta$HEADER$MedNormReference <- "external_adat, crossplate"
     } else if (is.character(reference)) {
-      header_meta$HEADER$MedNormReference <- basename(reference)
+      header_meta$HEADER$MedNormReference <- paste(basename(reference), "crossplate", sep = ", ")
     } else {
-      header_meta$HEADER$MedNormReference <- "external_data"
+      header_meta$HEADER$MedNormReference <- "external_data, crossplate"
     }
 
     attr(adat, "Header.Meta") <- header_meta
@@ -665,3 +725,275 @@ medianNormalize <- function(adat,
 
   adat
 }
+
+
+#' Validate Data State for Median Normalization
+#' @noRd
+.validateDataState <- function(adat, header, verbose, reverse_existing = FALSE) {
+
+  # Check if data is in a standard deliverable state
+  process_steps <- header$ProcessSteps %||% ""
+
+  # Check for existing median normalization
+  has_mednorm <- grepl("medNormInt|MedNorm", process_steps, ignore.case = TRUE)
+  has_anml <- grepl("ANML", process_steps, ignore.case = TRUE)
+
+  if (has_mednorm && !reverse_existing) {
+    stop(
+      "Data appears to already be median normalized. ",
+      "Set reverse_existing = TRUE to reverse existing median normalization before applying new normalization. ",
+      "ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+
+  if (has_anml) {
+    stop(
+      "Data appears to be ANML normalized. ",
+      "ANML normalized data cannot be directly processed with this function. ",
+      "ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+
+  # Check for required normalization steps
+  has_hyb <- grepl("Hyb|hybridization", process_steps, ignore.case = TRUE)
+  has_plate_scale <- grepl("PlateScale|plate.?scale", process_steps, ignore.case = TRUE)
+
+  # Warn if not in standard deliverable state
+  if (!has_hyb || !has_plate_scale) {
+    warning(
+      "Data may not be in standard deliverable format. ",
+      "Standard format requires hybridization normalization, median normalization of controls ",
+      "(buffer + calibrator), and plate scale normalization before applying median normalization. ",
+      "Current ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+
+  if (verbose) {
+    cat("Data validation passed for median normalization.\n")
+    cat("Standard deliverable checks:\n")
+    cat("  - Hybridization normalization:", if(has_hyb) "PASS" else "WARN", "\n")
+    cat("  - Plate scale normalization:", if(has_plate_scale) "PASS" else "WARN", "\n")
+    cat("  - No existing MedNorm/ANML:", if(!has_mednorm && !has_anml) "PASS" else if(reverse_existing && has_mednorm) "WARN (will reverse)" else "FAIL", "\n")
+  }
+
+  return(has_mednorm || has_anml)
+}
+
+
+#' Validate Dilution Count
+#' @noRd
+.validateDilutionCount <- function(dil_groups, verbose) {
+
+  num_dilutions <- length(dil_groups)
+
+  # Primary use cases are 1 or 3 dilutions
+  if (!num_dilutions %in% c(1, 3)) {
+    warning(
+      "Non-standard dilution count detected: ", num_dilutions, " dilutions. ",
+      "Primary use cases are 1 dilution (cell & tissue studies) or 3 dilutions (standard setups). ",
+      "Found dilutions: ", paste(names(dil_groups), collapse = ", "),
+      call. = FALSE
+    )
+  }
+
+  if (verbose) {
+    if (num_dilutions == 1) {
+      cat("Single dilution setup detected (typical for cell & tissue studies).\n")
+    } else if (num_dilutions == 3) {
+      cat("Three dilution setup detected (standard setup).\n")
+    } else {
+      cat("Non-standard dilution setup detected (", num_dilutions, " dilutions).\n")
+    }
+  }
+
+  invisible(NULL)
+}
+
+
+#' Add MedNorm Reference to SeqId Annotations
+#' @noRd
+.addMedNormReference <- function(adat, ref_data, dil_groups) {
+
+  # Get analyte info
+  apt_data <- getAnalyteInfo(adat)
+
+  # Initialize the column if it doesn't exist
+  if (!"medNormSMP_ReferenceRFU" %in% names(apt_data)) {
+    apt_data$medNormSMP_ReferenceRFU <- NA_real_
+  }
+
+  # For each analyte, add medNormSMP_ReferenceRFU
+  if (!is.null(ref_data)) {
+    for (dil_name in names(dil_groups)) {
+      if (dil_name %in% names(ref_data)) {
+        dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+
+        if (length(dil_apts) > 0) {
+          ref_values <- ref_data[[dil_name]]
+
+          # Handle both single reference and aptamer-specific references
+          if (is.numeric(ref_values) && length(ref_values) == 1) {
+            # Single reference value for the whole dilution group
+            apt_data$medNormSMP_ReferenceRFU[apt_data$AptName %in% dil_apts] <- ref_values
+          } else if (is.numeric(ref_values) && length(ref_values) > 1) {
+            # Aptamer-specific reference values
+            for (apt in dil_apts) {
+              if (apt %in% names(ref_values)) {
+                apt_data$medNormSMP_ReferenceRFU[apt_data$AptName == apt] <- ref_values[apt]
+              }
+            }
+          }
+        }
+      }
+    }
+
+    # Update the analyte metadata
+    attr(adat, "Col.Meta") <- apt_data
+  }
+
+  invisible(adat)
+}
+
+
+#' Reverse Existing Median Normalization for Study Samples
+#' @noRd
+.reverseMedNormSMP <- function(adat, verbose) {
+
+  # Get existing scale factors
+  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(sf_cols) == 0) {
+    if (verbose) {
+      cat("No existing median normalization scale factors found to reverse.\n")
+    }
+    return(adat)
+  }
+
+  if (verbose) {
+    cat("Reversing existing median normalization for study samples...\n")
+  }
+
+  # Get dilution groups
+  apt_data <- getAnalyteInfo(adat)
+  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
+  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
+  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
+
+  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
+  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+
+  if (sum(sample_mask) == 0) {
+    if (verbose) {
+      cat("No study samples found to reverse normalization.\n")
+    }
+    return(adat)
+  }
+
+  # For each dilution group, reverse normalization
+  for (dil_name in names(dil_groups)) {
+    sf_col <- paste0("NormScale_", dil_name)
+
+    if (sf_col %in% names(adat)) {
+      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+
+      if (length(dil_apts) > 0) {
+        for (i in which(sample_mask)) {
+          scale_factor <- adat[[sf_col]][i]
+          if (!is.na(scale_factor) && scale_factor != 0) {
+            # Apply inverse of scale factor
+            adat[i, dil_apts] <- adat[i, dil_apts] / scale_factor
+            # Reset scale factor to 1.0
+            adat[[sf_col]][i] <- 1.0
+          }
+        }
+      }
+    }
+  }
+
+  # Update ProcessSteps to indicate reversal
+  header_meta <- attr(adat, "Header.Meta")
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
+    header_meta$HEADER$ProcessSteps <- paste(current_steps, "reverseMedNormSMP", sep = ", ")
+    attr(adat, "Header.Meta") <- header_meta
+  }
+
+  if (verbose) {
+    cat("Median normalization reversed for", sum(sample_mask), "study samples.\n")
+  }
+
+  adat
+}
+
+
+#' Recalculate RowCheck after Median Normalization
+#'
+#' Recalculates RowCheck values as PASS or FLAG based on normalization acceptance
+#' criteria for row scale factors after median normalization. Samples with all
+#' row scale factors within the acceptance range (0.4 to 2.5) receive "PASS",
+#' while samples with any scale factor outside this range receive "FLAG".
+#'
+#' @param adat A `soma_adat` object after median normalization
+#' @param verbose Logical. Whether to print progress messages
+#' @return The `soma_adat` object with updated RowCheck values
+#' @noRd
+.recalculateRowCheck <- function(adat, verbose) {
+
+  if (verbose) {
+    cat("Recalculating RowCheck values based on normalization acceptance criteria...\n")
+  }
+
+  # Check if RowCheck column exists
+  if (!"RowCheck" %in% names(adat)) {
+    if (verbose) {
+      cat("No RowCheck column found to recalculate.\n")
+    }
+    return(adat)
+  }
+
+  # Find all normalization scale factor columns (NormScale_*)
+  scale_factor_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(scale_factor_cols) == 0) {
+    if (verbose) {
+      cat("No normalization scale factor columns found. Setting all RowCheck to PASS.\n")
+    }
+    adat$RowCheck <- "PASS"
+    return(adat)
+  }
+
+  # Define acceptance criteria range for row scale factors
+  min_scale <- 0.4
+  max_scale <- 2.5
+
+  # Calculate RowCheck for each sample
+  for (i in seq_len(nrow(adat))) {
+    # Get all scale factor values for this sample
+    scale_values <- as.numeric(adat[i, scale_factor_cols, drop = FALSE])
+    scale_values <- scale_values[!is.na(scale_values)]
+
+    # Check if all scale factors are within acceptance range
+    if (length(scale_values) == 0) {
+      # No scale factors available - default to PASS
+      adat$RowCheck[i] <- "PASS"
+    } else if (all(scale_values >= min_scale & scale_values <= max_scale)) {
+      adat$RowCheck[i] <- "PASS"
+    } else {
+      adat$RowCheck[i] <- "FLAG"
+    }
+  }
+
+  if (verbose) {
+    pass_count <- sum(adat$RowCheck == "PASS", na.rm = TRUE)
+    flag_count <- sum(adat$RowCheck == "FLAG", na.rm = TRUE)
+    cat("RowCheck values updated for", nrow(adat), "samples.\n")
+    cat("  - PASS:", pass_count, "samples\n")
+    cat("  - FLAG:", flag_count, "samples\n")
+    cat("  - Acceptance criteria: scale factors within [", min_scale, ", ", max_scale, "]\n")
+  }
+
+  adat
+}
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 4c2a38cc..8bd5756d 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,6 +5,7 @@ Agilent
 Analyte
 Analytes
 AptName
+AptNames
 AssayNotes
 Barcode
 BioTools
@@ -36,9 +37,10 @@ HybControlNormScale
 Kuei
 LF
 Lifecycle
+MADs
 MERCHANTABILITY
 MacOS
-MADs
+MedNorm
 NHS
 NormScale
 ORCID
@@ -55,7 +57,6 @@ RFU
 RFUs
 RUO
 ReferenceRFU
-Reproducibility
 RowCheck
 SELEX
 SG
@@ -87,8 +88,8 @@ TimePoint
 TubeUniqueID
 Un
 UniProt
-YAML
 adat
+adats
 aliquot
 analyte
 analytes
@@ -112,7 +113,6 @@ magrittr
 md
 medNormRef
 nd
-normals
 pipetting
 pkgdown
 plex
@@ -128,6 +128,4 @@ tada
 th
 tibble
 tidyr
-untransformed
 variabilities
-vectorized
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
index 00a22ad8..d8ac0a30 100644
--- a/man/medianNormalize.Rd
+++ b/man/medianNormalize.Rd
@@ -12,6 +12,7 @@ medianNormalize(
   by = "SampleType",
   do_field = "SampleType",
   do_regexp = "QC|Sample",
+  reverse_existing = FALSE,
   verbose = TRUE
 )
 }
@@ -57,6 +58,12 @@ separately. Default is \code{"SampleType"}.}
 \item{do_regexp}{Character. A regular expression pattern to select samples
 from \code{do_field} for normalization. Default is \code{"QC|Sample"}.}
 
+\item{reverse_existing}{Logical. Should existing median normalization be
+reversed before applying new normalization? When \code{TRUE}, existing median
+normalization scale factors are reversed for study samples only (QC,
+Calibrator, and Buffer samples retain their normalization). This allows
+re-normalization of data that has already been median normalized. Default is \code{FALSE}.}
+
 \item{verbose}{Logical. Should progress messages be printed? Default is \code{TRUE}.}
 }
 \value{
@@ -88,6 +95,30 @@ reference approaches: calculating an internal reference from study samples,
 or using a calculated reference from a different ADAT or external reference
 file.
 }
+\section{Data Requirements}{
+
+This function is designed for data in standard SomaLogic deliverable formats:
+\enumerate{
+\item \strong{Study samples not median normalized}, with all controls median normalized.
+Calibrators and buffers are intra-plate median normalized and QC samples are ANML
+normalized if present on standard 3-dilution setups. QC assay controls undergo
+full ANML data standardization even when study samples don't, in order to QC
+the plate. Cell & tissue single dilution runs don't have QC samples.
+\item \strong{Same as above}, but study samples undergo either ANML or intra-study
+standard median normalization.
+}
+
+All data should be hybridization normalized, median normalized internally
+(buffer + calibrator), and plate scaled regardless of matrix prior to applying
+median normalization. Calibration is not applied to all study types.
+
+\strong{Primary use cases:}
+\itemize{
+\item 3-dilution setups (standard SomaScan)
+\item Single dilution cell & tissue studies
+}
+}
+
 \section{Important Considerations}{
 
 \itemize{
@@ -95,6 +126,10 @@ file.
 conditions, perform normalization \emph{within} groups using the \code{by} parameter
 \item In newer SomaScan assay versions, population references improve
 inter-plate consistency compared to within-plate references
+\item The function will not proceed on already ANML or median normalized data
+unless \code{reverse_existing = TRUE} is specified
+\item When reversing existing normalization, only study samples are reversed;
+QC, Calibrator, and Buffer samples retain their normalization
 }
 }
 
@@ -137,5 +172,11 @@ med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
 med_norm_adat <- medianNormalize(adat,
                                  do_field = "SampleType",
                                  do_regexp = "Sample")
+
+# Re-normalize data that has already been median normalized
+# Use when you want to apply different normalization to previously normalized data
+med_norm_adat <- medianNormalize(adat,
+                                 reverse_existing = TRUE,
+                                 reference = new_reference)
 }
 }
diff --git a/man/soma_adat.Rd b/man/soma_adat.Rd
index d117ebd1..26127fdc 100644
--- a/man/soma_adat.Rd
+++ b/man/soma_adat.Rd
@@ -91,18 +91,16 @@ dispatched on class \code{soma_adat}.
 Below is a list of \emph{all} currently available S3 methods that dispatch on
 the \code{soma_adat} class:
 
-\if{html}{\out{<div class="sourceCode">}}\preformatted{#>  [1] [               [[              [[<-            [<-            
-#>  [5] ==              $               $<-             anti_join      
-#>  [9] arrange         centerScaleData convertAptNames count          
-#> [13] filter          full_join       getAdatVersion  getAnalytes    
-#> [17] getMeta         group_by        imputeNAs       inner_join     
-#> [21] is_seqFormat    is.logspace     left_join       Math           
-#> [25] median          merge           mutate          Ops            
-#> [29] print           rename          revertAptNames  right_join     
-#> [33] row.names<-     sample_frac     sample_n        select         
-#> [37] semi_join       separate        slice_sample    slice          
-#> [41] summary         Summary         transform       ungroup        
-#> [45] unite          
+\if{html}{\out{<div class="sourceCode">}}\preformatted{#>  [1] [              [[             [[<-           [<-           
+#>  [5] ==             $              $<-            anti_join     
+#>  [9] arrange        count          filter         full_join     
+#> [13] getAdatVersion getAnalytes    getMeta        group_by      
+#> [17] inner_join     is_seqFormat   left_join      Math          
+#> [21] median         merge          mutate         Ops           
+#> [25] print          rename         right_join     row.names<-   
+#> [29] sample_frac    sample_n       select         semi_join     
+#> [33] separate       slice_sample   slice          summary       
+#> [37] Summary        transform      ungroup        unite         
 #> see '?methods' for accessing help and source code
 }\if{html}{\out{</div>}}
 
diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
index 8ee3491c..8a528fe1 100644
--- a/tests/testthat/_snaps/medianNormalize.md
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -2,6 +2,13 @@
 
     Code
       result <- medianNormalize(test_data, verbose = TRUE)
+    Output
+      Data validation passed for median normalization.
+      Standard deliverable checks:
+        - Hybridization normalization: PASS 
+        - Plate scale normalization: PASS 
+        - No existing MedNorm/ANML: PASS 
+      Three dilution setup detected (standard setup).
     Message
       > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
       > Building internal reference from field: "SampleType" with values: "QC" and "Sample"
@@ -14,4 +21,10 @@
       v Processing dilution '0_005' with 173 analytes
       v Processing dilution '0_5' with 828 analytes
       v Processing dilution '20' with 4271 analytes
+    Output
+      Recalculating RowCheck values based on normalization acceptance criteria...
+      RowCheck values updated for 16 samples.
+        - PASS: 16 samples
+        - FLAG: 0 samples
+        - Acceptance criteria: scale factors within [ 0.4 ,  2.5 ]
 
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
index 6634c43f..fb7d4b7d 100644
--- a/tests/testthat/test-medianNormalize.R
+++ b/tests/testthat/test-medianNormalize.R
@@ -8,9 +8,18 @@ create_test_data <- function() {
 
   # Combine indices and subset
   subset_indices <- c(sample_indices, qc_indices)
+  test_adat <- example_data[subset_indices, ]
+
+  # Modify header to simulate pre-ANML state (remove ANML steps)
+  header_meta <- attr(test_adat, "Header.Meta")
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    # Remove ANML and median normalization steps to simulate pre-processed state
+    header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, plateScale, Calibration"
+    attr(test_adat, "Header.Meta") <- header_meta
+  }
 
   # Return subset with representative samples
-  example_data[subset_indices, ]
+  test_adat
 }
 
 test_data <- create_test_data()
@@ -42,7 +51,7 @@ test_that("`medianNormalize` Method 1: Internal reference (default)", {
   result_header <- attr(result, "Header.Meta")$HEADER
   expect_true(grepl("medNormInt", result_header$ProcessSteps))
   expect_equal(result_header$NormalizationAlgorithm, "MedNorm")
-  expect_equal(result_header$MedNormReference, "intraplate")
+  expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
 })
 
 test_that("`medianNormalize` Method 2: Reference from specific samples", {
@@ -72,7 +81,7 @@ test_that("`medianNormalize` Method 2: Reference from specific samples", {
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
-  expect_equal(result_header$MedNormReference, "intraplate")
+  expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
 })
 
 test_that("`medianNormalize` Method 3: Reference from another ADAT", {
@@ -99,7 +108,8 @@ test_that("`medianNormalize` Method 3: Reference from another ADAT", {
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
-  expect_equal(result_header$MedNormReference, "external_adat")
+  expect_true(grepl("external_adat", result_header$MedNormReference))
+  expect_true(grepl("crossplate", result_header$MedNormReference))
 })
 
 test_that("`medianNormalize` Method 4: External reference file", {
@@ -135,7 +145,8 @@ test_that("`medianNormalize` Method 4: External reference file", {
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
-  expect_equal(result_header$MedNormReference, basename(temp_csv))
+  expect_true(grepl(basename(temp_csv), result_header$MedNormReference))
+  expect_true(grepl("crossplate", result_header$MedNormReference))
 
   # Clean up
   unlink(temp_csv)
@@ -192,7 +203,8 @@ test_that("`medianNormalize` Method 5: External reference as data.frame", {
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
-  expect_equal(result_header$MedNormReference, "external_data")
+  expect_true(grepl("external_data", result_header$MedNormReference))
+  expect_true(grepl("crossplate", result_header$MedNormReference))
 })
 
 test_that("`medianNormalize` validates input requirements", {

From 8b5be65e6134dbdd5d5f4e11d34cfde26f566dfa Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Tue, 13 Jan 2026 18:14:09 -0500
Subject: [PATCH 03/10] Streamline unit tests for medianNormalize()

- use subset of the exmaple data for unit tests
  to reduce execution time
---
 tests/testthat/_snaps/medianNormalize.md |  20 +--
 tests/testthat/test-medianNormalize.R    | 148 +++++++++++------------
 2 files changed, 83 insertions(+), 85 deletions(-)

diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
index 8a528fe1..390f7f7c 100644
--- a/tests/testthat/_snaps/medianNormalize.md
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -13,18 +13,18 @@
       > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
       > Building internal reference from field: "SampleType" with values: "QC" and "Sample"
       > Performing grouped median normalization by: "SampleType" (2 groups)
-      > Processing group: "Sample" (12 samples)
-      v Processing dilution '0_005' with 173 analytes
-      v Processing dilution '0_5' with 828 analytes
-      v Processing dilution '20' with 4271 analytes
-      > Processing group: "QC" (4 samples)
-      v Processing dilution '0_005' with 173 analytes
-      v Processing dilution '0_5' with 828 analytes
-      v Processing dilution '20' with 4271 analytes
+      > Processing group: "Sample" (2 samples)
+      v Processing dilution '0_005' with 50 analytes
+      v Processing dilution '0_5' with 50 analytes
+      v Processing dilution '20' with 50 analytes
+      > Processing group: "QC" (1 samples)
+      v Processing dilution '0_005' with 50 analytes
+      v Processing dilution '0_5' with 50 analytes
+      v Processing dilution '20' with 50 analytes
     Output
       Recalculating RowCheck values based on normalization acceptance criteria...
-      RowCheck values updated for 16 samples.
-        - PASS: 16 samples
+      RowCheck values updated for 3 samples.
+        - PASS: 3 samples
         - FLAG: 0 samples
         - Acceptance criteria: scale factors within [ 0.4 ,  2.5 ]
 
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
index fb7d4b7d..afc8ebcf 100644
--- a/tests/testthat/test-medianNormalize.R
+++ b/tests/testthat/test-medianNormalize.R
@@ -1,14 +1,33 @@
 # Setup ----
-# Create a minimal test dataset
-create_test_data <- function() {
+# Create a minimal test dataset with options for small vs full size
+create_test_data <- function(small = FALSE) {
   data("example_data", package = "SomaDataIO")
-  # Select samples ensuring we have both Sample and QC types
-  sample_indices <- which(example_data$SampleType == "Sample")[1:12]
-  qc_indices <- which(example_data$SampleType == "QC")[1:4]
 
-  # Combine indices and subset
-  subset_indices <- c(sample_indices, qc_indices)
-  test_adat <- example_data[subset_indices, ]
+  if (small) {
+    # Ultra-small dataset for most tests: 3 samples, subset of analytes
+    sample_indices <- which(example_data$SampleType == "Sample")[1:2]
+    qc_indices <- which(example_data$SampleType == "QC")[1:1]
+    subset_indices <- c(sample_indices, qc_indices)
+
+    # Select subset of analytes from different dilution groups for speed
+    analytes <- getAnalytes(example_data)
+    analyte_info <- getAnalyteInfo(example_data)
+
+    # Get first 50 analytes from each dilution group for faster processing
+    selected_analytes <- c()
+    for (dil in unique(analyte_info$Dilution)) {
+      dil_analytes <- analytes[analyte_info$Dilution == dil]
+      selected_analytes <- c(selected_analytes, head(dil_analytes, 50))
+    }
+
+    test_adat <- example_data[subset_indices, c(getMeta(example_data), selected_analytes)]
+  } else {
+    # Standard test dataset: 16 samples, all analytes (for comprehensive tests)
+    sample_indices <- which(example_data$SampleType == "Sample")[1:12]
+    qc_indices <- which(example_data$SampleType == "QC")[1:4]
+    subset_indices <- c(sample_indices, qc_indices)
+    test_adat <- example_data[subset_indices, ]
+  }
 
   # Modify header to simulate pre-ANML state (remove ANML steps)
   header_meta <- attr(test_adat, "Header.Meta")
@@ -22,7 +41,8 @@ create_test_data <- function() {
   test_adat
 }
 
-test_data <- create_test_data()
+test_data <- create_test_data(small = TRUE)
+test_data_full <- create_test_data(small = FALSE)
 
 # Testing ----
 test_that("`medianNormalize` Method 1: Internal reference (default)", {
@@ -39,13 +59,13 @@ test_that("`medianNormalize` Method 1: Internal reference (default)", {
   expect_true(all(norm_cols %in% names(result)))
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(1.067845, 1.057102, 1.091748), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(1.022533, 1.001174, 1.012215), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(1.103112, 1.054358, 1.108711), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(0.971895, 1.029779, 1), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.002922, 0.997176, 1), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.973205, 1.028323, 1), tolerance = 0.000001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(508.8, 501.5, 453.7))
-  expect_equal(result$seq.10008.43[1:3], c(599.9, 572.8, 462.8))
+  expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 501.5))
+  expect_equal(result$seq.10001.7[1:3], c(301.4, 302.2, 207.4))
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -71,13 +91,13 @@ test_that("`medianNormalize` Method 2: Reference from specific samples", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 0.9841162), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1.0327016), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 0.9151915), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 1), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 1), tolerance = 0.000001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 415.6))
-  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 423.9))
+  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 501.5))
+  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 510.1))
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -86,7 +106,7 @@ test_that("`medianNormalize` Method 2: Reference from specific samples", {
 
 test_that("`medianNormalize` Method 3: Reference from another ADAT", {
   # Create a minimal reference ADAT from sample subset
-  ref_adat <- test_data[1:4, ]  # Use first 4 samples as reference
+  ref_adat <- test_data[1:2, ]  # Use first 2 samples as reference
 
   expect_no_error(
     result <- medianNormalize(test_data, reference = ref_adat, verbose = FALSE)
@@ -98,13 +118,13 @@ test_that("`medianNormalize` Method 3: Reference from another ADAT", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(0.9907154, 1.0109152, 0.9931271), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(1.005733, 1.008454, 0.986035), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(1.0090878, 0.9909594, 1.0002446), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(0.9718953, 1.0297787, 1.0841026), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.0029224, 0.9971762, 1.0548762), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.9732045, 1.0283228, 0.9601410), tolerance = 0.0001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(472.1, 479.6, 412.7))
-  expect_equal(result$seq.10008.43[1:3], c(556.6, 547.8, 421.0))
+  expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 543.7))
+  expect_equal(result$seq.10008.43[1:3], c(546, 558, 553))
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -135,13 +155,13 @@ test_that("`medianNormalize` Method 4: External reference file", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(3.31, 3.71, 4.21), tolerance = 0.01)
+  expect_equal(result$NormScale_0_005[1:3], c(0.577, 0.561, 0.561), tolerance = 0.001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.90, 1.18, 1.03), tolerance = 0.01)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
-  expect_equal(result$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
+  expect_equal(result$seq.10000.28[1:3], c(1579.0, 1759.9, 2109.7))
+  expect_equal(result$seq.10008.43[1:3], c(1861.7, 2010.3, 2145.9))
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -150,28 +170,6 @@ test_that("`medianNormalize` Method 4: External reference file", {
 
   # Clean up
   unlink(temp_csv)
-
-  # Test with TSV file
-  temp_tsv <- tempfile(fileext = ".tsv")
-  write.table(ref_data, temp_tsv, sep = "\t", row.names = FALSE)
-
-  expect_no_error(
-    result2 <- medianNormalize(test_data, reference = temp_tsv, verbose = FALSE)
-  )
-
-  expect_true(is.soma_adat(result2))
-
-  # Test scale factor output
-  expect_equal(result2$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
-  expect_equal(result2$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
-  expect_equal(result2$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
-
-  # Test specific SeqId columns
-  expect_equal(result2$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
-  expect_equal(result2$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
-
-  # Clean up
-  unlink(temp_tsv)
 })
 
 test_that("`medianNormalize` Method 5: External reference as data.frame", {
@@ -193,13 +191,13 @@ test_that("`medianNormalize` Method 5: External reference as data.frame", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(2.937272, 2.982111, 2.936927), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.6313838, 0.5966632, 0.6624721), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.7147623, 0.7780614, 0.7704032), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(3.31, 3.71, 4.21), tolerance = 0.01)
+  expect_equal(result$NormScale_0_005[1:3], c(0.577, 0.561, 0.561), tolerance = 0.001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.90, 1.18, 1.03), tolerance = 0.01)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(1399.6, 1414.7, 1220.6))
-  expect_equal(result$seq.10008.43[1:3], c(1650.2, 1616.0, 1245.0))
+  expect_equal(result$seq.10000.28[1:3], c(1579.0, 1759.9, 2109.7))
+  expect_equal(result$seq.10008.43[1:3], c(1861.7, 2010.3, 2145.9))
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -287,19 +285,19 @@ test_that("`medianNormalize` handles grouping correctly", {
   norm_cols <- grep("^NormScale_", names(result1), value = TRUE)
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
-  # Test scale factor output
-  expect_equal(result1$NormScale_20[1:3], c(1.066964, 1.060173, 1.020341), tolerance = 0.0001)
-  expect_equal(result1$NormScale_0_005[1:3], c(1.000455, 1.005372, 1.000000), tolerance = 0.0001)
-  expect_equal(result1$NormScale_0_5[1:3], c(1.115562, 1.050018, 1.030397), tolerance = 0.0001)
+  # Test scale factor output - will vary based on grouping by sex
+  expect_equal(result1$NormScale_20[1:3], c(0.971895, 1.029779, 1.199944), tolerance = 0.000001)
+  expect_equal(result1$NormScale_0_005[1:3], c(1.002922, 0.997176, 1.058289), tolerance = 0.000001)
+  expect_equal(result1$NormScale_0_5[1:3], c(0.973205, 1.028323, 1.104745), tolerance = 0.000001)
 
   # Test specific SeqId columns
-  expect_equal(result1$seq.10000.28[1:3], c(508.4, 502.9, 424.1))
-  expect_equal(result1$seq.10008.43[1:3], c(599.4, 574.5, 432.5))
+  expect_equal(result1$seq.10000.28[1:3], c(463.1, 488.5, 501.5))
+  expect_equal(result1$seq.10008.43[1:3], c(546.0, 558.0, 510.1))
 
-  # Test grouping by multiple variables
-  if ("PlateId" %in% names(test_data)) {
+  # Test grouping by multiple variables - use full example data
+  if ("PlateId" %in% names(test_data_full)) {
     expect_no_error(
-      result2 <- medianNormalize(test_data, by = c("PlateId", "SampleType"), verbose = FALSE)
+      result2 <- medianNormalize(test_data_full, by = c("PlateId", "SampleType"), verbose = FALSE)
     )
 
     # Check result structure
@@ -308,9 +306,9 @@ test_that("`medianNormalize` handles grouping correctly", {
     expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
     # Test scale factor output
-    expect_equal(result2$NormScale_20[1:3], c(1.053656, 1.059554, 1.068602), tolerance = 0.0001)
-    expect_equal(result2$NormScale_0_005[1:3], c(1.0124749, 1.0053543, 0.9992799), tolerance = 0.0001)
-    expect_equal(result2$NormScale_0_5[1:3], c(1.080347, 1.056874, 1.074292), tolerance = 0.0001)
+    expect_equal(result2$NormScale_20[1:3], c(1.054, 1.060, 1.069), tolerance = 0.001)
+    expect_equal(result2$NormScale_0_005[1:3], c(1.0125, 1.0054, 0.9993), tolerance = 0.001)
+    expect_equal(result2$NormScale_0_5[1:3], c(1.080, 1.057, 1.074), tolerance = 0.001)
 
     # Test specific SeqId columns
     expect_equal(result2$seq.10000.28[1:3], c(502.1, 502.7, 444.1))
@@ -339,14 +337,14 @@ test_that("`medianNormalize` handles sample selection correctly", {
   norm_cols <- grep("^NormScale_", names(result), value = TRUE)
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
-  # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 0.9841162), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1.0327016), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 0.9151915), tolerance = 0.0001)
+  # Test scale factor output - same as Method 2 for QC selection
+  expect_equal(result$NormScale_20[1:3], c(1.036936, 0.960225, 1), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.857016, 0.848584, 1), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.777175, 0.85202, 1), tolerance = 0.0001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 415.6))
-  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 423.9))
+  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 501.5))
+  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 510.1))
 
   # Test with non-existent field
   expect_error(

From 071f956f301f7a6b0b574f6784482ca13e8700e0 Mon Sep 17 00:00:00 2001
From: Alex Poole <alex.poole@standardbio.com>
Date: Tue, 27 Jan 2026 17:10:22 -0700
Subject: [PATCH 04/10] Update medianNormalize function documentation

---
 R/medianNormalize.R | 133 +++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 62 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index 4b44d55e..410d2c10 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -1,101 +1,110 @@
 #' Perform Median Normalization
 #'
 #' @description Performs median normalization on a `soma_adat` object that has
-#' already undergone hybridization normalization and contains plate scale
-#' normalization factors.
+#' already undergone standard data processing for array-based SomaScan studies.
 #'
-#' Median normalization is a technique used to adjust data sets to remove
-#' certain assay artifacts and biases prior to analysis, particularly those that
-#' may arise due to differences in overall protein concentration, pipetting
-#' errors, reagent concentration changes, assay timing, and other systematic
-#' variabilities. Median normalization can improve assay precision and reduce
-#' technical variations that can mask true biological signals.
+#' Median normalization is a common, scale-based normalization technique that
+#' corrects for assay-derived technical variation by applying sample-specific
+#' linear scaling to expression measurements. Typical sources of assay
+#' variation include robotic and manual liquid handling, manufactured
+#' consumables such as buffers and plastic goods, laboratory instrument
+#' calibration, ambient environmental conditions, inter-operator differences,
+#' and other sources of technical variation. Median normalization
+#' can improve assay precision and reduce technical variation that can mask
+#' true biological signal.
 #'
-#' This function works by calculating or using reference median values for each
-#' dilution group present in the ADAT. Dilution groups are determined from the
-#' analyte metadata and represent different sample dilutions used in the assay
-#' (e.g., 1:1, 1:200, 1:2000). For each sample, the median RFU across all
-#' analytes within a dilution group is calculated and compared to the reference
-#' median for that group. Scale factors are then computed and applied to adjust
-#' each sample's values toward the reference, ensuring consistent signal levels
-#' across samples within each dilution group. This function supports multiple
-#' reference approaches: calculating an internal reference from study samples,
-#' or using a calculated reference from a different ADAT or external reference
-#' file.
+#' The method scales each sample so that the center of the within-sample analyte
+#' distribution aligns to a defined reference, thereby correcting
+#' global intensity shifts without altering relative differences between
+#' measurements within a sample. For assay formats with multiple dilution groups
+#' (e.g., 1:5 or 20%;  1:200 or 0.5%, 1:20,000 or 0.005%), separate scale
+#' factors are calculated for each dilution because each dilution group is
+#' processed separately during the assay. For each sample, the ratio of
+#' reference RFU / observed RFU is calculated for every SeqId. The median ratio
+#' within each dilution group is selected as the scale factor and applied to
+#' all SeqIds for that sample within the associated dilution bin.
 #'
 #' @section Data Requirements:
-#' This function is designed for data in standard SomaLogic deliverable formats:
+#' This function is designed for data in standard SomaLogic deliverable formats.
+#' Specific ADAT file requirements:
 #' \enumerate{
-#'   \item \strong{Study samples not median normalized}, with all controls median normalized.
-#'         Calibrators and buffers are intra-plate median normalized and QC samples are ANML
-#'         normalized if present on standard 3-dilution setups. QC assay controls undergo
-#'         full ANML data standardization even when study samples don't, in order to QC
-#'         the plate. Cell & tissue single dilution runs don't have QC samples.
-#'   \item \strong{Same as above}, but study samples undergo either ANML or intra-study
-#'         standard median normalization.
+#'   \item \strong{Intact ADAT file}, with available data processing information
+#'     in the header section. Specifically, the `ProcessSteps` field must be
+#'     present and correctly represent the data processing steps present in
+#'     the data table.
+#'   \item \strong{Minimal standard processing}, the function assumes a standard
+#'     SomaScan data deliverable with minimally standard HybNorm and PlateScale
+#'     steps applied.
 #' }
 #'
-#' All data should be hybridization normalized, median normalized internally
-#' (buffer + calibrator), and plate scaled regardless of matrix prior to applying
-#' median normalization. Calibration is not applied to all study types.
-#'
 #' \strong{Primary use cases:}
 #' \itemize{
-#'   \item 3-dilution setups (standard SomaScan)
-#'   \item Single dilution cell & tissue studies
+#'   \item Combining data sets from the same overarching experiment or sample
+#'     population and normalize to a common reference that were originally
+#'     processed separately and each normalized "withing study".
+#'   \item Normalize fundamentally different types of samples separately (by
+#'     group). For instance, lysate samples from different cell lines that
+#'     will be analyzed separately should likely be median normalized within
+#'     each cell type. Lysis buffer background samples would also be expected
+#'     to be normalized separately.
 #' }
 #'
 #' @section Important Considerations:
 #' \itemize{
-#'   \item If there is a known change in total protein concentration due to analysis
-#'     conditions, perform normalization _within_ groups using the `by` parameter
-#'   \item In newer SomaScan assay versions, population references improve
-#'     inter-plate consistency compared to within-plate references
-#'   \item The function will not proceed on already ANML or median normalized data
-#'     unless `reverse_existing = TRUE` is specified
-#'   \item When reversing existing normalization, only study samples are reversed;
-#'     QC, Calibrator, and Buffer samples retain their normalization
+#'   \item A core assumption of median normalization is that the majority of
+#'     analytes are not differentially expressed; consequently, users should
+#'     validate this assumption by inspecting scale-factor distributions for
+#'     systematic bias between the biological groups intended for comparison.
+#'   \item Note this function does not perform the adaptive normalization by
+#'     maximum likelihood (ANML) method which leverages a population-based
+#'     reference that iteratively down-selects the set of analytes to include
+#'     for the normalization calculation.
+#'   \item The function requires `reverse_existing = TRUE` to be set in order
+#'     to process data where study samples have already undergone ANML or
+#'     standard median normalization.
+#'   \item When reversing existing normalization, only study samples are
+#'     reversed; QC, Calibrator, and Buffer samples retain their normalization
 #' }
 #'
 #' @param adat A `soma_adat` object created using [read_adat()], containing
 #'   RFU values that have been hybridization normalized and plate scaled.
 #' @param reference Optional. Reference for median normalization. Can be:
 #'   \itemize{
-#'     \item `NULL` (default): Use internal reference from study samples
+#'     \item `NULL` (default): Calculate an internal reference from study
+#'       samples by taking the median of each SeqId within the sample
+#'       grouping. For multi-plate studies, the median of all plate medians
+#'       is used.
 #'     \item A `soma_adat` object: Extract reference from this ADAT
-#'     \item A file path (character): Read external reference from tab/comma-separated file
+#'     \item A file path (character): Read external reference from
+#'       tab/comma-separated file
 #'     \item A data.frame: Use provided reference data directly
 #'   }
 #'   When providing an external reference file or data.frame it must contain:
 #'   \describe{
-#'     \item{Dilution}{Character column specifying the dilution group names
-#'       (e.g., "0_005", "0_5", "20"). These should match the dilution
-#'       groups present in the ADAT analyte data.}
-#'     \item{Reference}{Numeric column containing the reference median values
-#'       for each dilution group. These values will be used as the target
-#'       medians for normalization.}
-#'     \item{SeqId}{Optional character column. When included, SeqId-specific
-#'       reference values are used for more precise normalization. Values
-#'       must be in "10000-28" format, not "seq.10000.28" format.}
+#'     \item{SeqId}{Character column containing the SeqId identifiers mapping
+#'       to those in the `soma_adat` object. Must be in "10000-28" format, not
+#'       "seq.10000.28" format.}
+#'     \item{Reference}{Numeric column containing the reference RFU values
+#'       for each SeqId.}
+#'
 #'   }
 #' @param ref_field Character. Field used to select reference samples when
 #'   `reference = NULL`. Default is `"SampleType"`.
 #' @param ref_value Character. Value(s) in `ref_field` to use as reference
-#'   when `reference = NULL`. Default is `c("QC", "Sample")`.
-#' @param by Character vector. Grouping variable(s) for grouped median normalization.
-#'   Must be column name(s) in the ADAT. Use grouping when there are known changes
-#'   in total protein load due to analysis conditions (e.g., disease vs. control,
-#'   treatment vs. baseline). Normalization will be performed within each group
-#'   separately. Default is `"SampleType"`.
+#'   when `reference = NULL`. Default is `c("Sample")`.
+#' @param by Character vector. Grouping variable(s) for grouped median
+#'   normalization. Must be column name(s) in the ADAT. Normalization will be
+#'   performed within each group separately. Default is `"SampleType"`.
 #' @param do_field Character. The field used to select samples for normalization
 #'   (others keep original values). Default is `"SampleType"`.
 #' @param do_regexp Character. A regular expression pattern to select samples
-#'   from `do_field` for normalization. Default is `"QC|Sample"`.
+#'   from `do_field` for normalization. Default is `"Sample"`.
 #' @param reverse_existing Logical. Should existing median normalization be
 #'   reversed before applying new normalization? When `TRUE`, existing median
 #'   normalization scale factors are reversed for study samples only (QC,
 #'   Calibrator, and Buffer samples retain their normalization). This allows
-#'   re-normalization of data that has already been median normalized. Default is `FALSE`.
+#'   re-normalization of data that has already been median normalized. Default
+#'   is `FALSE`.
 #' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
 #' @return A `soma_adat` object with median normalization applied and RFU values
 #'   adjusted. The existing `NormScale_*` columns are updated to include the
@@ -153,10 +162,10 @@
 medianNormalize <- function(adat,
                             reference = NULL,
                             ref_field = "SampleType",
-                            ref_value = c("QC", "Sample"),
+                            ref_value = c("Sample"),
                             by = "SampleType",
                             do_field = "SampleType",
-                            do_regexp = "QC|Sample",
+                            do_regexp = "Sample",
                             reverse_existing = FALSE,
                             verbose = TRUE) {
 

From 8e47546eff534912636dce535e5f1e817b496baa Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Mon, 9 Feb 2026 12:00:18 -0500
Subject: [PATCH 05/10] Simplify API, use sample-only defaults, add reverse
 ANML support

- adjusted defaults to sample-only processing (SampleType == Sample)
- added ANML normalization reversal support
- simplified external reference to data.frame-only input
- now require only SeqId and Reference columns for external reference data
- added medNormSMP_ReferenceRFU annotation field with 2 decimal precision
- updated ProcessSteps formatting to use "MedNormSMP" with reversal tracking
---
 NAMESPACE                                |   1 -
 R/medianNormalize.R                      | 256 ++++++++++++++---------
 inst/WORDLIST                            |   1 +
 man/medianNormalize.Rd                   | 162 +++++++-------
 tests/testthat/_snaps/medianNormalize.md |   8 +-
 tests/testthat/test-medianNormalize.R    | 224 ++++++++++++--------
 6 files changed, 367 insertions(+), 285 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index ffe655c4..a78cf5cf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -219,6 +219,5 @@ importFrom(utils,capture.output)
 importFrom(utils,head)
 importFrom(utils,read.csv)
 importFrom(utils,read.delim)
-importFrom(utils,read.table)
 importFrom(utils,tail)
 importFrom(utils,write.table)
diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index 410d2c10..a74ba896 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -75,18 +75,15 @@
 #'       grouping. For multi-plate studies, the median of all plate medians
 #'       is used.
 #'     \item A `soma_adat` object: Extract reference from this ADAT
-#'     \item A file path (character): Read external reference from
-#'       tab/comma-separated file
 #'     \item A data.frame: Use provided reference data directly
 #'   }
-#'   When providing an external reference file or data.frame it must contain:
+#'   When providing an external reference data.frame it must contain:
 #'   \describe{
 #'     \item{SeqId}{Character column containing the SeqId identifiers mapping
 #'       to those in the `soma_adat` object. Must be in "10000-28" format, not
 #'       "seq.10000.28" format.}
 #'     \item{Reference}{Numeric column containing the reference RFU values
 #'       for each SeqId.}
-#'
 #'   }
 #' @param ref_field Character. Field used to select reference samples when
 #'   `reference = NULL`. Default is `"SampleType"`.
@@ -99,12 +96,12 @@
 #'   (others keep original values). Default is `"SampleType"`.
 #' @param do_regexp Character. A regular expression pattern to select samples
 #'   from `do_field` for normalization. Default is `"Sample"`.
-#' @param reverse_existing Logical. Should existing median normalization be
+#' @param reverse_existing Logical. Should existing median or ANML normalization be
 #'   reversed before applying new normalization? When `TRUE`, existing median
-#'   normalization scale factors are reversed for study samples only (QC,
-#'   Calibrator, and Buffer samples retain their normalization). This allows
-#'   re-normalization of data that has already been median normalized. Default
-#'   is `FALSE`.
+#'   normalization scale factors or ANML normalization effects are reversed for
+#'   study samples only (QC, Calibrator, and Buffer samples retain their
+#'   normalization). This allows re-normalization of data that has already been
+#'   median normalized or ANML normalized. Default is `FALSE`.
 #' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
 #' @return A `soma_adat` object with median normalization applied and RFU values
 #'   adjusted. The existing `NormScale_*` columns are updated to include the
@@ -112,30 +109,19 @@
 #' @examples
 #' \dontrun{
 #' # Internal reference from study samples (default)
-#' # Use when you have representative QC or control samples in your study
 #' med_norm_adat <- medianNormalize(adat)
 #'
 #' # Reference from specific samples in the ADAT
-#' # Use when you want to normalize against only QC samples
 #' med_norm_adat <- medianNormalize(adat,
 #'                                  ref_field = "SampleType",
-#'                                  ref_value = "QC")
+#'                                  ref_value = "Sample")
 #'
 #' # Reference from another ADAT
-#' # Use when you have a reference population or control study
 #' ref_adat <- read_adat("reference_file.adat")
 #' med_norm_adat <- medianNormalize(adat, reference = ref_adat)
 #'
-#' # External reference file
-#' # Use when you have pre-calculated reference medians for each dilution
-#' med_norm_adat <- medianNormalize(adat, reference = "reference_file.csv")
-#'
-#' # External reference as data.frame
-#' # Use for programmatic control over reference values
-#' ref_data <- data.frame(
-#'   Dilution = c("0_005", "0_5", "20"),
-#'   Reference = c(1500.2, 3200.8, 4100.5)
-#' )
+#' # External reference as a data.frame - requires `SeqId` and `Reference` columns
+#' ref_data <- read.csv("reference_file.csv")
 #' med_norm_adat <- medianNormalize(adat, reference = ref_data)
 #'
 #' # Custom grouping by multiple variables
@@ -144,12 +130,11 @@
 #' med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
 #'
 #' # Normalize only specific sample types
-#' # Use when you want to preserve original values for certain sample types
 #' med_norm_adat <- medianNormalize(adat,
 #'                                  do_field = "SampleType",
 #'                                  do_regexp = "Sample")
 #'
-#' # Re-normalize data that has already been median normalized
+#' # Re-normalize data that has already been median or ANML normalized
 #' # Use when you want to apply different normalization to previously normalized data
 #' med_norm_adat <- medianNormalize(adat,
 #'                                  reverse_existing = TRUE,
@@ -157,7 +142,6 @@
 #' }
 #' @importFrom dplyr filter
 #' @importFrom stats median
-#' @importFrom utils read.table read.csv
 #' @export
 medianNormalize <- function(adat,
                             reference = NULL,
@@ -172,6 +156,14 @@ medianNormalize <- function(adat,
   # Input validation ----
   stopifnot("`adat` must be a class `soma_adat` object" = is.soma_adat(adat))
 
+  # Validate reference type early
+  if (!is.null(reference) && !is.soma_adat(reference) && !is.data.frame(reference)) {
+    stop(
+      "Invalid reference type. Must be NULL, soma_adat, or data.frame",
+      call. = FALSE
+    )
+  }
+
   # Check that required normalization steps have been applied ----
   header <- attr(adat, "Header.Meta")$HEADER
 
@@ -205,9 +197,20 @@ medianNormalize <- function(adat,
   # Reverse existing normalization if requested ----
   if (reverse_existing && has_existing_norm) {
     if (verbose) {
-      cat("Reversing existing median normalization for study samples...\n")
+      if (grepl("ANML", header$ProcessSteps %||% "", ignore.case = TRUE)) {
+        cat("Reversing existing ANML normalization for study samples...\n")
+        adat <- .reverseANMLSMP(adat, verbose)
+      } else {
+        cat("Reversing existing median normalization for study samples...\n")
+        adat <- .reverseMedNormSMP(adat, verbose)
+      }
+    } else {
+      if (grepl("ANML", header$ProcessSteps %||% "", ignore.case = TRUE)) {
+        adat <- .reverseANMLSMP(adat, verbose)
+      } else {
+        adat <- .reverseMedNormSMP(adat, verbose)
+      }
     }
-    adat <- .reverseMedNormSMP(adat, verbose)
   }
 
   # Create dilution groups ----
@@ -353,13 +356,6 @@ medianNormalize <- function(adat,
     }
     return(.extractReferenceFromAdat(reference, dil_groups))
 
-  } else if (is.character(reference) && length(reference) == 1) {
-    # External reference file
-    if (verbose) {
-      .todo("Reading external reference from file: {.val {reference}}")
-    }
-    return(.readExternalReference(reference, dil_groups, apt_data))
-
   } else if (is.data.frame(reference)) {
     # Reference data provided directly
     if (verbose) {
@@ -369,7 +365,7 @@ medianNormalize <- function(adat,
 
   } else {
     stop(
-      "Invalid reference type. Must be NULL, soma_adat, file path, or data.frame",
+      "Invalid reference type. Must be NULL, soma_adat, or data.frame",
       call. = FALSE
     )
   }
@@ -423,62 +419,31 @@ medianNormalize <- function(adat,
   ref_data
 }
 
-#' Read External Reference File
-#' @noRd
-.readExternalReference <- function(file_path, dil_groups, apt_data) {
-
-  if (!file.exists(file_path)) {
-    stop("Reference file not found: ", file_path, call. = FALSE)
-  }
 
-  # Determine file type and read
-  file_ext_val <- file_ext(file_path)
-
-  if (file_ext_val %in% c("csv")) {
-    ref_df <- read.csv(file_path, stringsAsFactors = FALSE)
-  } else if (file_ext_val %in% c("txt", "tsv")) {
-    ref_df <- utils::read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
-  } else {
-    # Try to auto-detect
-    ref_df <- tryCatch({
-      read.csv(file_path, stringsAsFactors = FALSE)
-    }, error = function(e) {
-      utils::read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
-    })
-  }
-
-  .validateReferenceData(ref_df, dil_groups, apt_data)
-}
 
 #' Validate Reference Data
 #' @noRd
 .validateReferenceData <- function(ref_df, dil_groups, apt_data = NULL) {
 
-  # Check that required columns are present
-  required_cols <- c("Dilution", "Reference")
+  # Check for required SeqId and Reference columns
+  required_cols <- c("SeqId", "Reference")
   if (!all(required_cols %in% names(ref_df))) {
     missing_cols <- setdiff(required_cols, names(ref_df))
     stop(
-      "Reference data must contain columns: ", paste(missing_cols, collapse = ", "),
+      "Reference data must contain 'SeqId' and 'Reference' columns.\n",
+      "Missing columns: ", paste(missing_cols, collapse = ", "), "\n",
+      "Found columns: ", paste(names(ref_df), collapse = ", "),
       call. = FALSE
     )
   }
 
-  # Check if we have SeqId-specific information
-  if ("SeqId" %in% names(ref_df)) {
-    # Process as SeqId-specific reference
-    return(.processSeqIdReference(ref_df, dil_groups, apt_data))
-  } else {
-    # Process as simple dilution-level reference
-    return(.processSimpleReference(ref_df, dil_groups))
-  }
+  # Process as SeqId-specific reference
+  return(.processSeqIdReference(ref_df, dil_groups, apt_data))
 }
 
 #' Process SeqId-Specific Reference Data
 #' @noRd
 .processSeqIdReference <- function(ref_df, dil_groups, apt_data) {
-  # Convert dilution values to character for consistent matching
-  ref_df$Dilution <- as.character(ref_df$Dilution)
 
   if (!is.null(apt_data) && !"SeqId" %in% names(apt_data)) {
     stop("ADAT analyte data must contain SeqId column for SeqId reference matching", call. = FALSE)
@@ -518,23 +483,7 @@ medianNormalize <- function(adat,
   ref_data
 }
 
-#' Process Simple Reference Data (Dilution -> Reference)
-#' @noRd
-.processSimpleReference <- function(ref_df, dil_groups) {
-  # Convert to named list
-  ref_data <- setNames(ref_df$Reference, ref_df$Dilution)
-
-  # Check that we have references for all dilution groups
-  missing_dils <- setdiff(names(dil_groups), names(ref_data))
-  if (length(missing_dils) > 0) {
-    stop(
-      "Missing reference values for dilution groups: ", paste(missing_dils, collapse = ", "),
-      call. = FALSE
-    )
-  }
 
-  ref_data
-}
 
 #' Perform Median Normalization
 #' @noRd
@@ -698,15 +647,15 @@ medianNormalize <- function(adat,
   if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
     # Add median normalization to process steps
     if ("ProcessSteps" %in% names(header_meta$HEADER)) {
-      if (!grepl("medNormInt", header_meta$HEADER$ProcessSteps)) {
+      if (!grepl("MedNormSMP", header_meta$HEADER$ProcessSteps)) {
         header_meta$HEADER$ProcessSteps <- paste(
           header_meta$HEADER$ProcessSteps,
-          "medNormInt (SampleId)",
+          "MedNormSMP",
           sep = ", "
         )
       }
     } else {
-      header_meta$HEADER$ProcessSteps <- "medNormInt (SampleId)"
+      header_meta$HEADER$ProcessSteps <- "MedNormSMP"
     }
 
     # Set normalization algorithm
@@ -756,10 +705,10 @@ medianNormalize <- function(adat,
     )
   }
 
-  if (has_anml) {
+  if (has_anml && !reverse_existing) {
     stop(
       "Data appears to be ANML normalized. ",
-      "ANML normalized data cannot be directly processed with this function. ",
+      "Set reverse_existing = TRUE to reverse existing ANML normalization before applying new normalization. ",
       "ProcessSteps: ", process_steps,
       call. = FALSE
     )
@@ -845,13 +794,15 @@ medianNormalize <- function(adat,
 
           # Handle both single reference and aptamer-specific references
           if (is.numeric(ref_values) && length(ref_values) == 1) {
-            # Single reference value for the whole dilution group
-            apt_data$medNormSMP_ReferenceRFU[apt_data$AptName %in% dil_apts] <- ref_values
+            # Single reference value for the whole dilution group (round to 2 decimal places)
+            rounded_value <- round(ref_values, 2)
+            apt_data$medNormSMP_ReferenceRFU[apt_data$AptName %in% dil_apts] <- rounded_value
           } else if (is.numeric(ref_values) && length(ref_values) > 1) {
-            # Aptamer-specific reference values
+            # Aptamer-specific reference values (round to 2 decimal places)
             for (apt in dil_apts) {
               if (apt %in% names(ref_values)) {
-                apt_data$medNormSMP_ReferenceRFU[apt_data$AptName == apt] <- ref_values[apt]
+                rounded_value <- round(ref_values[apt], 2)
+                apt_data$medNormSMP_ReferenceRFU[apt_data$AptName == apt] <- rounded_value
               }
             }
           }
@@ -926,7 +877,16 @@ medianNormalize <- function(adat,
   header_meta <- attr(adat, "Header.Meta")
   if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
     current_steps <- header_meta$HEADER$ProcessSteps %||% ""
-    header_meta$HEADER$ProcessSteps <- paste(current_steps, "reverseMedNormSMP", sep = ", ")
+
+    # Add reversal step - look for what type of median norm was reversed
+    if (grepl("MedNormSMP", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-MedNormSMP", sep = ", ")
+    } else if (grepl("medNormInt", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-medNormInt", sep = ", ")
+    } else {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-MedNorm", sep = ", ")
+    }
+
     attr(adat, "Header.Meta") <- header_meta
   }
 
@@ -938,6 +898,102 @@ medianNormalize <- function(adat,
 }
 
 
+#' Reverse Existing ANML Normalization for Study Samples
+#' @noRd
+.reverseANMLSMP <- function(adat, verbose) {
+
+  # Get existing scale factors
+  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(sf_cols) == 0) {
+    if (verbose) {
+      cat("No existing ANML normalization scale factors found to reverse.\n")
+    }
+    return(adat)
+  }
+
+  if (verbose) {
+    cat("Reversing existing ANML normalization for study samples...\n")
+  }
+
+  # Get dilution groups
+  apt_data <- getAnalyteInfo(adat)
+  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
+  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
+  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
+
+  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
+  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+
+  if (sum(sample_mask) == 0) {
+    if (verbose) {
+      cat("No study samples found to reverse ANML normalization.\n")
+    }
+    return(adat)
+  }
+
+  # For each dilution group, reverse ANML normalization using log space
+  for (dil_name in names(dil_groups)) {
+    sf_col <- paste0("NormScale_", dil_name)
+
+    if (sf_col %in% names(adat)) {
+      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+
+      if (length(dil_apts) > 0) {
+        for (i in which(sample_mask)) {
+          scale_factor <- adat[[sf_col]][i]
+          if (!is.na(scale_factor) && scale_factor != 0) {
+            # ANML uses log space scaling - reverse by subtracting log scale factor
+            log_sf <- log10(scale_factor)
+            log_data <- log10(as.numeric(adat[i, dil_apts]))
+            reversed_log_data <- log_data - log_sf
+            adat[i, dil_apts] <- 10^reversed_log_data
+            # Reset scale factor to 1.0
+            adat[[sf_col]][i] <- 1.0
+          }
+        }
+      }
+    }
+  }
+
+  # Remove ANMLFractionUsed columns if they exist
+  anml_frac_cols <- grep("^ANMLFractionUsed_", names(adat), value = TRUE)
+  if (length(anml_frac_cols) > 0) {
+    for (col in anml_frac_cols) {
+      if (all(is.na(adat[[col]][sample_mask]))) {
+        adat[[col]][sample_mask] <- NA
+      }
+    }
+  }
+
+  # Update ProcessSteps to indicate reversal
+  header_meta <- attr(adat, "Header.Meta")
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
+
+    # Add reversal step - look for what type of ANML was reversed
+    if (grepl("anmlSMP", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-anmlSMP", sep = ", ")
+    } else if (grepl("ANML", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-ANML", sep = ", ")
+    }
+
+    attr(adat, "Header.Meta") <- header_meta
+  }
+
+  # Reset RowCheck for reversed samples
+  if ("RowCheck" %in% names(adat)) {
+    adat$RowCheck[sample_mask] <- "PASS"
+  }
+
+  if (verbose) {
+    cat("ANML normalization reversed for", sum(sample_mask), "study samples.\n")
+  }
+
+  adat
+}
+
+
 #' Recalculate RowCheck after Median Normalization
 #'
 #' Recalculates RowCheck values as PASS or FLAG based on normalization acceptance
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 8bd5756d..606b0080 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -34,6 +34,7 @@ ExtIdentifier
 FC
 Hiser
 HybControlNormScale
+HybNorm
 Kuei
 LF
 Lifecycle
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
index d8ac0a30..34ca2032 100644
--- a/man/medianNormalize.Rd
+++ b/man/medianNormalize.Rd
@@ -8,10 +8,10 @@ medianNormalize(
   adat,
   reference = NULL,
   ref_field = "SampleType",
-  ref_value = c("QC", "Sample"),
+  ref_value = c("Sample"),
   by = "SampleType",
   do_field = "SampleType",
-  do_regexp = "QC|Sample",
+  do_regexp = "Sample",
   reverse_existing = FALSE,
   verbose = TRUE
 )
@@ -22,47 +22,44 @@ RFU values that have been hybridization normalized and plate scaled.}
 
 \item{reference}{Optional. Reference for median normalization. Can be:
 \itemize{
-\item \code{NULL} (default): Use internal reference from study samples
+\item \code{NULL} (default): Calculate an internal reference from study
+samples by taking the median of each SeqId within the sample
+grouping. For multi-plate studies, the median of all plate medians
+is used.
 \item A \code{soma_adat} object: Extract reference from this ADAT
-\item A file path (character): Read external reference from tab/comma-separated file
 \item A data.frame: Use provided reference data directly
 }
-When providing an external reference file or data.frame it must contain:
+When providing an external reference data.frame it must contain:
 \describe{
-\item{Dilution}{Character column specifying the dilution group names
-(e.g., "0_005", "0_5", "20"). These should match the dilution
-groups present in the ADAT analyte data.}
-\item{Reference}{Numeric column containing the reference median values
-for each dilution group. These values will be used as the target
-medians for normalization.}
-\item{SeqId}{Optional character column. When included, SeqId-specific
-reference values are used for more precise normalization. Values
-must be in "10000-28" format, not "seq.10000.28" format.}
+\item{SeqId}{Character column containing the SeqId identifiers mapping
+to those in the \code{soma_adat} object. Must be in "10000-28" format, not
+"seq.10000.28" format.}
+\item{Reference}{Numeric column containing the reference RFU values
+for each SeqId.}
 }}
 
 \item{ref_field}{Character. Field used to select reference samples when
 \code{reference = NULL}. Default is \code{"SampleType"}.}
 
 \item{ref_value}{Character. Value(s) in \code{ref_field} to use as reference
-when \code{reference = NULL}. Default is \code{c("QC", "Sample")}.}
+when \code{reference = NULL}. Default is \code{c("Sample")}.}
 
-\item{by}{Character vector. Grouping variable(s) for grouped median normalization.
-Must be column name(s) in the ADAT. Use grouping when there are known changes
-in total protein load due to analysis conditions (e.g., disease vs. control,
-treatment vs. baseline). Normalization will be performed within each group
-separately. Default is \code{"SampleType"}.}
+\item{by}{Character vector. Grouping variable(s) for grouped median
+normalization. Must be column name(s) in the ADAT. Normalization will be
+performed within each group separately. Default is \code{"SampleType"}.}
 
 \item{do_field}{Character. The field used to select samples for normalization
 (others keep original values). Default is \code{"SampleType"}.}
 
 \item{do_regexp}{Character. A regular expression pattern to select samples
-from \code{do_field} for normalization. Default is \code{"QC|Sample"}.}
+from \code{do_field} for normalization. Default is \code{"Sample"}.}
 
-\item{reverse_existing}{Logical. Should existing median normalization be
+\item{reverse_existing}{Logical. Should existing median or ANML normalization be
 reversed before applying new normalization? When \code{TRUE}, existing median
-normalization scale factors are reversed for study samples only (QC,
-Calibrator, and Buffer samples retain their normalization). This allows
-re-normalization of data that has already been median normalized. Default is \code{FALSE}.}
+normalization scale factors or ANML normalization effects are reversed for
+study samples only (QC, Calibrator, and Buffer samples retain their
+normalization). This allows re-normalization of data that has already been
+median normalized or ANML normalized. Default is \code{FALSE}.}
 
 \item{verbose}{Logical. Should progress messages be printed? Default is \code{TRUE}.}
 }
@@ -73,93 +70,91 @@ effects of both plate scale normalization and median normalization.
 }
 \description{
 Performs median normalization on a \code{soma_adat} object that has
-already undergone hybridization normalization and contains plate scale
-normalization factors.
-
-Median normalization is a technique used to adjust data sets to remove
-certain assay artifacts and biases prior to analysis, particularly those that
-may arise due to differences in overall protein concentration, pipetting
-errors, reagent concentration changes, assay timing, and other systematic
-variabilities. Median normalization can improve assay precision and reduce
-technical variations that can mask true biological signals.
-
-This function works by calculating or using reference median values for each
-dilution group present in the ADAT. Dilution groups are determined from the
-analyte metadata and represent different sample dilutions used in the assay
-(e.g., 1:1, 1:200, 1:2000). For each sample, the median RFU across all
-analytes within a dilution group is calculated and compared to the reference
-median for that group. Scale factors are then computed and applied to adjust
-each sample's values toward the reference, ensuring consistent signal levels
-across samples within each dilution group. This function supports multiple
-reference approaches: calculating an internal reference from study samples,
-or using a calculated reference from a different ADAT or external reference
-file.
+already undergone standard data processing for array-based SomaScan studies.
+
+Median normalization is a common, scale-based normalization technique that
+corrects for assay-derived technical variation by applying sample-specific
+linear scaling to expression measurements. Typical sources of assay
+variation include robotic and manual liquid handling, manufactured
+consumables such as buffers and plastic goods, laboratory instrument
+calibration, ambient environmental conditions, inter-operator differences,
+and other sources of technical variation. Median normalization
+can improve assay precision and reduce technical variation that can mask
+true biological signal.
+
+The method scales each sample so that the center of the within-sample analyte
+distribution aligns to a defined reference, thereby correcting
+global intensity shifts without altering relative differences between
+measurements within a sample. For assay formats with multiple dilution groups
+(e.g., 1:5 or 20\%;  1:200 or 0.5\%, 1:20,000 or 0.005\%), separate scale
+factors are calculated for each dilution because each dilution group is
+processed separately during the assay. For each sample, the ratio of
+reference RFU / observed RFU is calculated for every SeqId. The median ratio
+within each dilution group is selected as the scale factor and applied to
+all SeqIds for that sample within the associated dilution bin.
 }
 \section{Data Requirements}{
 
-This function is designed for data in standard SomaLogic deliverable formats:
+This function is designed for data in standard SomaLogic deliverable formats.
+Specific ADAT file requirements:
 \enumerate{
-\item \strong{Study samples not median normalized}, with all controls median normalized.
-Calibrators and buffers are intra-plate median normalized and QC samples are ANML
-normalized if present on standard 3-dilution setups. QC assay controls undergo
-full ANML data standardization even when study samples don't, in order to QC
-the plate. Cell & tissue single dilution runs don't have QC samples.
-\item \strong{Same as above}, but study samples undergo either ANML or intra-study
-standard median normalization.
+\item \strong{Intact ADAT file}, with available data processing information
+in the header section. Specifically, the \code{ProcessSteps} field must be
+present and correctly represent the data processing steps present in
+the data table.
+\item \strong{Minimal standard processing}, the function assumes a standard
+SomaScan data deliverable with minimally standard HybNorm and PlateScale
+steps applied.
 }
 
-All data should be hybridization normalized, median normalized internally
-(buffer + calibrator), and plate scaled regardless of matrix prior to applying
-median normalization. Calibration is not applied to all study types.
-
 \strong{Primary use cases:}
 \itemize{
-\item 3-dilution setups (standard SomaScan)
-\item Single dilution cell & tissue studies
+\item Combining data sets from the same overarching experiment or sample
+population and normalize to a common reference that were originally
+processed separately and each normalized "withing study".
+\item Normalize fundamentally different types of samples separately (by
+group). For instance, lysate samples from different cell lines that
+will be analyzed separately should likely be median normalized within
+each cell type. Lysis buffer background samples would also be expected
+to be normalized separately.
 }
 }
 
 \section{Important Considerations}{
 
 \itemize{
-\item If there is a known change in total protein concentration due to analysis
-conditions, perform normalization \emph{within} groups using the \code{by} parameter
-\item In newer SomaScan assay versions, population references improve
-inter-plate consistency compared to within-plate references
-\item The function will not proceed on already ANML or median normalized data
-unless \code{reverse_existing = TRUE} is specified
-\item When reversing existing normalization, only study samples are reversed;
-QC, Calibrator, and Buffer samples retain their normalization
+\item A core assumption of median normalization is that the majority of
+analytes are not differentially expressed; consequently, users should
+validate this assumption by inspecting scale-factor distributions for
+systematic bias between the biological groups intended for comparison.
+\item Note this function does not perform the adaptive normalization by
+maximum likelihood (ANML) method which leverages a population-based
+reference that iteratively down-selects the set of analytes to include
+for the normalization calculation.
+\item The function requires \code{reverse_existing = TRUE} to be set in order
+to process data where study samples have already undergone ANML or
+standard median normalization.
+\item When reversing existing normalization, only study samples are
+reversed; QC, Calibrator, and Buffer samples retain their normalization
 }
 }
 
 \examples{
 \dontrun{
 # Internal reference from study samples (default)
-# Use when you have representative QC or control samples in your study
 med_norm_adat <- medianNormalize(adat)
 
 # Reference from specific samples in the ADAT
-# Use when you want to normalize against only QC samples
 med_norm_adat <- medianNormalize(adat,
                                  ref_field = "SampleType",
-                                 ref_value = "QC")
+                                 ref_value = "Sample")
 
 # Reference from another ADAT
-# Use when you have a reference population or control study
 ref_adat <- read_adat("reference_file.adat")
 med_norm_adat <- medianNormalize(adat, reference = ref_adat)
 
-# External reference file
-# Use when you have pre-calculated reference medians for each dilution
-med_norm_adat <- medianNormalize(adat, reference = "reference_file.csv")
-
-# External reference as data.frame
-# Use for programmatic control over reference values
-ref_data <- data.frame(
-  Dilution = c("0_005", "0_5", "20"),
-  Reference = c(1500.2, 3200.8, 4100.5)
-)
+# External reference as a data.frame - requires `SeqId` and `Reference` columns
+ref_data <- read.csv("reference_file.csv")
 med_norm_adat <- medianNormalize(adat, reference = ref_data)
 
 # Custom grouping by multiple variables
@@ -168,12 +163,11 @@ med_norm_adat <- medianNormalize(adat, reference = ref_data)
 med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
 
 # Normalize only specific sample types
-# Use when you want to preserve original values for certain sample types
 med_norm_adat <- medianNormalize(adat,
                                  do_field = "SampleType",
                                  do_regexp = "Sample")
 
-# Re-normalize data that has already been median normalized
+# Re-normalize data that has already been median or ANML normalized
 # Use when you want to apply different normalization to previously normalized data
 med_norm_adat <- medianNormalize(adat,
                                  reverse_existing = TRUE,
diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
index 390f7f7c..1c6b9627 100644
--- a/tests/testthat/_snaps/medianNormalize.md
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -11,13 +11,7 @@
       Three dilution setup detected (standard setup).
     Message
       > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
-      > Building internal reference from field: "SampleType" with values: "QC" and "Sample"
-      > Performing grouped median normalization by: "SampleType" (2 groups)
-      > Processing group: "Sample" (2 samples)
-      v Processing dilution '0_005' with 50 analytes
-      v Processing dilution '0_5' with 50 analytes
-      v Processing dilution '20' with 50 analytes
-      > Processing group: "QC" (1 samples)
+      > Building internal reference from field: "SampleType" with values: "Sample"
       v Processing dilution '0_005' with 50 analytes
       v Processing dilution '0_5' with 50 analytes
       v Processing dilution '20' with 50 analytes
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
index afc8ebcf..f8ad5541 100644
--- a/tests/testthat/test-medianNormalize.R
+++ b/tests/testthat/test-medianNormalize.R
@@ -59,9 +59,9 @@ test_that("`medianNormalize` Method 1: Internal reference (default)", {
   expect_true(all(norm_cols %in% names(result)))
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(0.971895, 1.029779, 1), tolerance = 0.000001)
-  expect_equal(result$NormScale_0_005[1:3], c(1.002922, 0.997176, 1), tolerance = 0.000001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.973205, 1.028323, 1), tolerance = 0.000001)
+  expect_equal(result$NormScale_20[1:3], c(0.971895, 1.029779, 1.199944), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.002922, 0.997176, 1.058289), tolerance = 0.000001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.973205, 1.028323, 1.104745), tolerance = 0.000001)
 
   # Test specific SeqId columns
   expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 501.5))
@@ -69,19 +69,19 @@ test_that("`medianNormalize` Method 1: Internal reference (default)", {
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
-  expect_true(grepl("medNormInt", result_header$ProcessSteps))
+  expect_true(grepl("MedNormSMP", result_header$ProcessSteps))
   expect_equal(result_header$NormalizationAlgorithm, "MedNorm")
   expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
 })
 
 test_that("`medianNormalize` Method 2: Reference from specific samples", {
 
-  # Test with specific reference field and value
+  # Test with specific reference field and value - using QC as reference but normalizing both QC and Sample
   expect_no_error(
     result <- medianNormalize(test_data,
                               ref_field = "SampleType",
                               ref_value = "QC",
-                              do_regexp = "QC",
+                              do_regexp = "QC|Sample",  # Normalize both types
                               verbose = FALSE)
   )
 
@@ -90,14 +90,14 @@ test_that("`medianNormalize` Method 2: Reference from specific samples", {
   norm_cols <- grep("^NormScale_", names(result), value = TRUE)
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
-  # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(1.0369358, 0.9602251, 1), tolerance = 0.000001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.8570162, 0.8485842, 1), tolerance = 0.000001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.7771749, 0.8520195, 1), tolerance = 0.000001)
+  # Test scale factor output - normalized both QC and Sample types, so QC (3rd) gets scale factor 1.0
+  expect_equal(result$NormScale_20[1:3], c(0.9089, 0.9578, 1.0000), tolerance = 0.001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.9783, 0.9806, 1.0000), tolerance = 0.001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.9879, 1.1445, 1.0000), tolerance = 0.001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 501.5))
-  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 510.1))
+  expect_equal(result$seq.10000.28[1:3], c(433.1, 454.4, 501.5), tolerance = 0.1)
+  expect_equal(result$seq.10008.43[1:3], c(510.6, 519.0, 510.1), tolerance = 0.1)
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -118,13 +118,13 @@ test_that("`medianNormalize` Method 3: Reference from another ADAT", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(0.9718953, 1.0297787, 1.0841026), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(1.0029224, 0.9971762, 1.0548762), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.9732045, 1.0283228, 0.9601410), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(0.9719, 1.0298, 1.1999), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(1.0029, 0.9972, 1.0583), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.9732, 1.0283, 1.1047), tolerance = 0.0001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 543.7))
-  expect_equal(result$seq.10008.43[1:3], c(546, 558, 553))
+  expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 501.5), tolerance = 0.1)
+  expect_equal(result$seq.10008.43[1:3], c(546, 558, 510.1), tolerance = 0.1)
 
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
@@ -132,52 +132,12 @@ test_that("`medianNormalize` Method 3: Reference from another ADAT", {
   expect_true(grepl("crossplate", result_header$MedNormReference))
 })
 
-test_that("`medianNormalize` Method 4: External reference file", {
-  # Create a temporary CSV file with reference data
-  # Use realistic dilution groups from example_data: "20", "0.5", "0.005", "0"
-  ref_data <- data.frame(
-    Dilution = c("20", "0_5", "0_005", "0"),
-    Reference = c(2500.5, 1800.2, 3200.8, 1500.0),
-    stringsAsFactors = FALSE
-  )
-
-  # Create temporary CSV file
-  temp_csv <- tempfile(fileext = ".csv")
-  write.csv(ref_data, temp_csv, row.names = FALSE)
-
-  expect_no_error(
-    result <- medianNormalize(test_data, reference = temp_csv, verbose = FALSE)
-  )
-
-  # Check result structure
-  expect_true(is.soma_adat(result))
-  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
-  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
-
-  # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(3.31, 3.71, 4.21), tolerance = 0.01)
-  expect_equal(result$NormScale_0_005[1:3], c(0.577, 0.561, 0.561), tolerance = 0.001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.90, 1.18, 1.03), tolerance = 0.01)
-
-  # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(1579.0, 1759.9, 2109.7))
-  expect_equal(result$seq.10008.43[1:3], c(1861.7, 2010.3, 2145.9))
-
-  # Check header metadata
-  result_header <- attr(result, "Header.Meta")$HEADER
-  expect_true(grepl(basename(temp_csv), result_header$MedNormReference))
-  expect_true(grepl("crossplate", result_header$MedNormReference))
-
-  # Clean up
-  unlink(temp_csv)
-})
-
-test_that("`medianNormalize` Method 5: External reference as data.frame", {
-
-  # Create reference data.frame with realistic dilution groups
+test_that("`medianNormalize` External reference data.frame", {
+  # Create reference data.frame with SeqId-Reference format
+  analyte_info <- getAnalyteInfo(test_data)
   ref_data <- data.frame(
-    Dilution = c("20", "0_5", "0_005", "0"),
-    Reference = c(2500.5, 1800.2, 3200.8, 1500.0),
+    SeqId = analyte_info$SeqId[1:15],  # Use first 15 SeqIds
+    Reference = runif(15, 2000, 4000),  # Random reference values
     stringsAsFactors = FALSE
   )
 
@@ -190,19 +150,19 @@ test_that("`medianNormalize` Method 5: External reference as data.frame", {
   norm_cols <- grep("^NormScale_", names(result), value = TRUE)
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
-  # Test scale factor output
-  expect_equal(result$NormScale_20[1:3], c(3.31, 3.71, 4.21), tolerance = 0.01)
-  expect_equal(result$NormScale_0_005[1:3], c(0.577, 0.561, 0.561), tolerance = 0.001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.90, 1.18, 1.03), tolerance = 0.01)
-
-  # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(1579.0, 1759.9, 2109.7))
-  expect_equal(result$seq.10008.43[1:3], c(1861.7, 2010.3, 2145.9))
-
   # Check header metadata
   result_header <- attr(result, "Header.Meta")$HEADER
   expect_true(grepl("external_data", result_header$MedNormReference))
   expect_true(grepl("crossplate", result_header$MedNormReference))
+
+  # Check that medNormSMP_ReferenceRFU field was added with proper formatting
+  result_analyte_info <- getAnalyteInfo(result)
+  expect_true("medNormSMP_ReferenceRFU" %in% names(result_analyte_info))
+
+  # Check reference values are rounded to 2 decimal places
+  ref_values <- result_analyte_info$medNormSMP_ReferenceRFU
+  rounded_values <- round(ref_values, 2)
+  expect_equal(ref_values[!is.na(ref_values)], rounded_values[!is.na(rounded_values)])
 })
 
 test_that("`medianNormalize` validates input requirements", {
@@ -241,36 +201,48 @@ test_that("`medianNormalize` validates input requirements", {
   )
 
   # Test with invalid reference data.frame (missing required columns)
-  invalid_ref <- data.frame(Wrong = c("20", "0_5"), Column = c(1000, 2000))
+  invalid_ref <- data.frame(Wrong = c("10000-28", "10001-7"), Column = c(1000, 2000))
   expect_error(
     medianNormalize(test_data, reference = invalid_ref, verbose = FALSE),
-    "Reference data must contain columns: Dilution, Reference"
+    "Reference data must contain 'SeqId' and 'Reference' columns"
   )
 
-  # Test with non-existent reference file
+  # Test with invalid reference type (non-data.frame)
   expect_error(
-    medianNormalize(test_data, reference = "non_existent_file.csv", verbose = FALSE),
-    "Reference file not found"
+    medianNormalize(test_data, reference = "invalid_string", verbose = FALSE),
+    "Invalid reference type"
   )
 
-  # Test with invalid reference type
+  # Test with numeric input
   expect_error(
     medianNormalize(test_data, reference = 123, verbose = FALSE),
     "Invalid reference type"
   )
 })
 
-test_that("`medianNormalize` external reference file validation", {
-  # Test with missing dilution groups in reference
-  incomplete_ref <- data.frame(
-    Dilution = c("20", "0_5"),  # Missing "0_005" and "0"
+test_that("`medianNormalize` external reference data.frame validation", {
+  # Test with invalid SeqId format (should error when no matches found)
+  invalid_seqid_ref <- data.frame(
+    SeqId = c("invalid_format_1", "invalid_format_2"),  # Invalid SeqId format
     Reference = c(2500, 1800),
     stringsAsFactors = FALSE
   )
 
   expect_error(
-    medianNormalize(test_data, reference = incomplete_ref, verbose = FALSE),
-    "Missing reference values for dilution groups"
+    medianNormalize(test_data, reference = invalid_seqid_ref, verbose = FALSE),
+    "No matching SeqIds or dilution groups found in reference data"
+  )
+
+  # Test with empty reference data
+  empty_ref <- data.frame(
+    SeqId = character(0),
+    Reference = numeric(0),
+    stringsAsFactors = FALSE
+  )
+
+  expect_error(
+    medianNormalize(test_data, reference = empty_ref, verbose = FALSE),
+    "No matching SeqIds or dilution groups found in reference data"
   )
 })
 
@@ -306,13 +278,13 @@ test_that("`medianNormalize` handles grouping correctly", {
     expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
     # Test scale factor output
-    expect_equal(result2$NormScale_20[1:3], c(1.054, 1.060, 1.069), tolerance = 0.001)
-    expect_equal(result2$NormScale_0_005[1:3], c(1.0125, 1.0054, 0.9993), tolerance = 0.001)
-    expect_equal(result2$NormScale_0_5[1:3], c(1.080, 1.057, 1.074), tolerance = 0.001)
+    expect_equal(result2$NormScale_20[1:3], c(1.0678, 1.0571, 1.0917), tolerance = 0.001)
+    expect_equal(result2$NormScale_0_005[1:3], c(1.0225, 1.0012, 1.0122), tolerance = 0.001)
+    expect_equal(result2$NormScale_0_5[1:3], c(1.1031, 1.0544, 1.1087), tolerance = 0.001)
 
     # Test specific SeqId columns
-    expect_equal(result2$seq.10000.28[1:3], c(502.1, 502.7, 444.1))
-    expect_equal(result2$seq.10008.43[1:3], c(591.9, 574.2, 453.0))
+    expect_equal(result2$seq.10000.28[1:3], c(508.8, 501.5, 453.7))
+    expect_equal(result2$seq.10008.43[1:3], c(599.9, 572.8, 462.8))
   }
 
   # Test error with non-existent grouping column
@@ -338,13 +310,13 @@ test_that("`medianNormalize` handles sample selection correctly", {
   expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
 
   # Test scale factor output - same as Method 2 for QC selection
-  expect_equal(result$NormScale_20[1:3], c(1.036936, 0.960225, 1), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.857016, 0.848584, 1), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.777175, 0.85202, 1), tolerance = 0.0001)
+  expect_equal(result$NormScale_20[1:3], c(1.036936, 0.960225, 1.08410), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_005[1:3], c(0.857016, 0.848584, 1.05488), tolerance = 0.0001)
+  expect_equal(result$NormScale_0_5[1:3], c(0.777175, 0.85202, 0.96014), tolerance = 0.0001)
 
   # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 501.5))
-  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 510.1))
+  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 543.7))
+  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 553.0))
 
   # Test with non-existent field
   expect_error(
@@ -371,3 +343,69 @@ test_that("`medianNormalize` produces expected verbose output", {
     NA  # NA indicates no output expected
   )
 })
+
+test_that("`medianNormalize` can reverse ANML normalization", {
+  # Create ANML-like test data by modifying test_data
+  anml_test_data <- test_data
+
+  # Modify header to simulate ANML-processed data
+  header_meta <- attr(anml_test_data, "Header.Meta")
+  header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt (SampleId), plateScale, Calibration, anmlQC, qcCheck, anmlSMP"
+  header_meta$HEADER$NormalizationAlgorithm <- "ANML"
+  attr(anml_test_data, "Header.Meta") <- header_meta
+
+  # Add ANML-like normalization scale factors (non-1.0 values to simulate ANML)
+  anml_test_data$NormScale_20 <- c(1.15, 0.95, 1.08)
+  anml_test_data$NormScale_0_5 <- c(1.22, 0.88, 1.12)
+  anml_test_data$NormScale_0_005 <- c(0.93, 1.07, 0.98)
+
+  # Add ANMLFractionUsed columns to mimic ANML data
+  anml_test_data$ANMLFractionUsed_20 <- c(0.85, 0.92, 0.78)
+  anml_test_data$ANMLFractionUsed_0_5 <- c(0.88, 0.91, 0.83)
+  anml_test_data$ANMLFractionUsed_0_005 <- c(0.90, 0.87, 0.85)
+
+  # Test ANML reversal
+  expect_no_error(
+    result <- medianNormalize(anml_test_data, reverse_existing = TRUE, verbose = FALSE)
+  )
+
+  # Check that result is valid
+  expect_true(is.soma_adat(result))
+
+  # Check that ProcessSteps includes reversal and new normalization
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_true(grepl("rev-anmlSMP", result_header$ProcessSteps))
+  expect_true(grepl("MedNormSMP", result_header$ProcessSteps))
+
+  # Test that error occurs without reverse_existing flag
+  expect_error(
+    medianNormalize(anml_test_data, reverse_existing = FALSE, verbose = FALSE),
+  )
+})
+
+test_that("`medianNormalize` handles SeqId reference format properly", {
+  # Test with SeqId-Reference format
+  analytes <- getAnalytes(test_data)
+  seqids <- getAnalyteInfo(test_data)$SeqId[1:10]  # First 10 SeqIds
+
+  ref_data <- data.frame(
+    SeqId = seqids,
+    Reference = runif(10, 1000, 5000)  # Random reference values
+  )
+
+  expect_no_error(
+    result <- medianNormalize(test_data, reference = ref_data, verbose = FALSE)
+  )
+
+  # Check that result is valid
+  expect_true(is.soma_adat(result))
+
+  # Check that medNormSMP_ReferenceRFU field was added with 2 decimal places
+  analyte_info <- getAnalyteInfo(result)
+  expect_true("medNormSMP_ReferenceRFU" %in% names(analyte_info))
+
+  # Check reference values are rounded to 2 decimal places
+  ref_values <- analyte_info$medNormSMP_ReferenceRFU
+  rounded_values <- round(ref_values, 2)
+  expect_equal(ref_values[!is.na(ref_values)], rounded_values[!is.na(rounded_values)])
+})

From 959a7236b03191cd29a2b1201e41cb624de92787 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Mon, 9 Feb 2026 12:46:12 -0500
Subject: [PATCH 06/10] Remove sample selection parameters, use study samples
 only

- removed sample selection parameters (ref_field, ref_value,
  do_field, do_regexp) in favor of simplification in alignment
  with default of using study samples only
---
 R/medianNormalize.R                      | 99 +++++++++---------------
 man/medianNormalize.Rd                   | 30 +------
 tests/testthat/_snaps/medianNormalize.md |  2 +-
 tests/testthat/test-medianNormalize.R    | 77 ++----------------
 4 files changed, 44 insertions(+), 164 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index a74ba896..bf1aac89 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -85,17 +85,11 @@
 #'     \item{Reference}{Numeric column containing the reference RFU values
 #'       for each SeqId.}
 #'   }
-#' @param ref_field Character. Field used to select reference samples when
-#'   `reference = NULL`. Default is `"SampleType"`.
-#' @param ref_value Character. Value(s) in `ref_field` to use as reference
-#'   when `reference = NULL`. Default is `c("Sample")`.
 #' @param by Character vector. Grouping variable(s) for grouped median
 #'   normalization. Must be column name(s) in the ADAT. Normalization will be
-#'   performed within each group separately. Default is `"SampleType"`.
-#' @param do_field Character. The field used to select samples for normalization
-#'   (others keep original values). Default is `"SampleType"`.
-#' @param do_regexp Character. A regular expression pattern to select samples
-#'   from `do_field` for normalization. Default is `"Sample"`.
+#'   performed within each group separately. Default is `"SampleType"`. Note
+#'   that only study samples (SampleType == 'Sample') are normalized; QC,
+#'   Calibrator, and Buffer samples are automatically excluded.
 #' @param reverse_existing Logical. Should existing median or ANML normalization be
 #'   reversed before applying new normalization? When `TRUE`, existing median
 #'   normalization scale factors or ANML normalization effects are reversed for
@@ -111,11 +105,6 @@
 #' # Internal reference from study samples (default)
 #' med_norm_adat <- medianNormalize(adat)
 #'
-#' # Reference from specific samples in the ADAT
-#' med_norm_adat <- medianNormalize(adat,
-#'                                  ref_field = "SampleType",
-#'                                  ref_value = "Sample")
-#'
 #' # Reference from another ADAT
 #' ref_adat <- read_adat("reference_file.adat")
 #' med_norm_adat <- medianNormalize(adat, reference = ref_adat)
@@ -129,11 +118,6 @@
 #' # (normalize within groups to account for expected biological differences)
 #' med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
 #'
-#' # Normalize only specific sample types
-#' med_norm_adat <- medianNormalize(adat,
-#'                                  do_field = "SampleType",
-#'                                  do_regexp = "Sample")
-#'
 #' # Re-normalize data that has already been median or ANML normalized
 #' # Use when you want to apply different normalization to previously normalized data
 #' med_norm_adat <- medianNormalize(adat,
@@ -145,11 +129,7 @@
 #' @export
 medianNormalize <- function(adat,
                             reference = NULL,
-                            ref_field = "SampleType",
-                            ref_value = c("Sample"),
                             by = "SampleType",
-                            do_field = "SampleType",
-                            do_regexp = "Sample",
                             reverse_existing = FALSE,
                             verbose = TRUE) {
 
@@ -243,52 +223,47 @@ medianNormalize <- function(adat,
   }
 
 
-  # Determine which samples to normalize ----
-  if (!is.null(do_field) && !is.null(do_regexp)) {
-    if (!do_field %in% names(adat)) {
-      stop("Field `", do_field, "` not found in adat columns", call. = FALSE)
-    }
+  # Determine which samples to normalize - only Sample types
+  if (!"SampleType" %in% names(adat)) {
+    stop("Field 'SampleType' not found in adat columns", call. = FALSE)
+  }
 
-    do_samples <- grep(do_regexp, adat[[do_field]])
-    if (length(do_samples) == 0L) {
-      stop(
-        "No samples selected for normalization with pattern: ", do_regexp,
-        call. = FALSE
-      )
-    }
-    dont_samples <- setdiff(seq_len(nrow(adat)), do_samples)
-  } else {
-    do_samples <- seq_len(nrow(adat))
-    dont_samples <- NULL
+  do_samples <- grep("Sample", adat[["SampleType"]])
+  if (length(do_samples) == 0L) {
+    stop(
+      "No samples selected for normalization with pattern: Sample",
+      call. = FALSE
+    )
   }
+  dont_samples <- setdiff(seq_len(nrow(adat)), do_samples)
 
   # Process reference ----
   if (is.null(reference)) {
-    # Check if ref_field conflicts with grouping variables
+    # Check if SampleType conflicts with grouping variables
     conflicts_with_grouping <- FALSE
-    if (ref_field %in% by) {
+    if ("SampleType" %in% by) {
       samples_to_normalize <- adat[do_samples, ]
-      group_values <- unique(samples_to_normalize[[ref_field]])
+      group_values <- unique(samples_to_normalize[["SampleType"]])
       group_values <- group_values[!is.na(group_values)]
-      all_groups_in_ref <- all(group_values %in% ref_value)
-      conflicts_with_grouping <- !identical(by, ref_field) || !all_groups_in_ref
+      all_groups_in_ref <- all(group_values %in% "Sample")
+      conflicts_with_grouping <- !identical(by, "SampleType") || !all_groups_in_ref
     }
 
     if (conflicts_with_grouping) {
       # Calculate global reference to avoid groups lacking reference samples
       if (verbose) {
-        .todo("Building global internal reference from field: {.val {ref_field}} with values: {.val {ref_value}}")
+        .todo("Building global internal reference from study samples (SampleType == 'Sample')")
       }
-      ref_data <- .buildInternalReference(adat, ref_field, ref_value, dil_groups)
+      ref_data <- .buildInternalReference(adat, dil_groups)
     } else {
       # Standard internal reference - calculate per group
       ref_data <- NULL
       if (verbose) {
-        .todo("Building internal reference from field: {.val {ref_field}} with values: {.val {ref_value}}")
+        .todo("Building internal reference from study samples (SampleType == 'Sample')")
       }
     }
   } else {
-    ref_data <- .processReference(reference, ref_field, ref_value, adat, dil_groups, apt_data, verbose)
+    ref_data <- .processReference(reference, adat, dil_groups, apt_data, verbose)
   }
 
   # Add row identifier to maintain order
@@ -301,8 +276,6 @@ medianNormalize <- function(adat,
       dil_groups = dil_groups,
       by = by,
       ref_data = ref_data,
-      ref_field = ref_field,
-      ref_value = ref_value,
       verbose = verbose
     )
   }
@@ -347,7 +320,7 @@ medianNormalize <- function(adat,
 
 #' Process Reference Data
 #' @noRd
-.processReference <- function(reference, ref_field, ref_value, adat, dil_groups, apt_data, verbose) {
+.processReference <- function(reference, adat, dil_groups, apt_data, verbose) {
 
   if (is.soma_adat(reference)) {
     # Use reference from provided ADAT
@@ -373,18 +346,17 @@ medianNormalize <- function(adat,
 
 #' Build Internal Reference from Study Samples
 #' @noRd
-.buildInternalReference <- function(adat, ref_field, ref_value, dil_groups) {
+.buildInternalReference <- function(adat, dil_groups) {
 
-  if (!ref_field %in% names(adat)) {
-    stop("Reference field `", ref_field, "` not found", call. = FALSE)
+  if (!"SampleType" %in% names(adat)) {
+    stop("Reference field 'SampleType' not found", call. = FALSE)
   }
 
   # Select reference samples
-  ref_samples <- adat[[ref_field]] %in% ref_value
+  ref_samples <- adat[["SampleType"]] %in% "Sample"
   if (sum(ref_samples) == 0) {
     stop(
-      "No reference samples found with field `", ref_field,
-      "` and values: ", paste(ref_value, collapse = ", "),
+      "No reference samples found with field 'SampleType' and value: Sample",
       call. = FALSE
     )
   }
@@ -487,7 +459,7 @@ medianNormalize <- function(adat,
 
 #' Perform Median Normalization
 #' @noRd
-.performMedianNorm <- function(adat, dil_groups, by, ref_data, ref_field, ref_value, verbose) {
+.performMedianNorm <- function(adat, dil_groups, by, ref_data, verbose) {
 
   # Store original rownames to restore later
   original_rownames <- rownames(adat)
@@ -580,16 +552,15 @@ medianNormalize <- function(adat,
           grp_ref_values <- apply(grp_adat[, dil_apts, drop = FALSE], 2, median, na.rm = TRUE)
         }
       } else {
-        # Internal reference: Use samples from this group only
-        if (!ref_field %in% names(grp_adat)) {
-          stop("Reference field `", ref_field, "` not found", call. = FALSE)
+        # Internal reference: Use Sample types from this group only
+        if (!"SampleType" %in% names(grp_adat)) {
+          stop("Reference field 'SampleType' not found", call. = FALSE)
         }
 
-        ref_samples_mask <- grp_adat[[ref_field]] %in% ref_value
+        ref_samples_mask <- grp_adat[["SampleType"]] %in% "Sample"
         if (sum(ref_samples_mask) == 0) {
           stop(
-            "No reference samples found with field `", ref_field,
-            "` and values: ", paste(ref_value, collapse = ", "),
+            "No reference samples found with field 'SampleType' and value: Sample",
             call. = FALSE
           )
         }
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
index 34ca2032..88e8fcc5 100644
--- a/man/medianNormalize.Rd
+++ b/man/medianNormalize.Rd
@@ -7,11 +7,7 @@
 medianNormalize(
   adat,
   reference = NULL,
-  ref_field = "SampleType",
-  ref_value = c("Sample"),
   by = "SampleType",
-  do_field = "SampleType",
-  do_regexp = "Sample",
   reverse_existing = FALSE,
   verbose = TRUE
 )
@@ -38,21 +34,11 @@ to those in the \code{soma_adat} object. Must be in "10000-28" format, not
 for each SeqId.}
 }}
 
-\item{ref_field}{Character. Field used to select reference samples when
-\code{reference = NULL}. Default is \code{"SampleType"}.}
-
-\item{ref_value}{Character. Value(s) in \code{ref_field} to use as reference
-when \code{reference = NULL}. Default is \code{c("Sample")}.}
-
 \item{by}{Character vector. Grouping variable(s) for grouped median
 normalization. Must be column name(s) in the ADAT. Normalization will be
-performed within each group separately. Default is \code{"SampleType"}.}
-
-\item{do_field}{Character. The field used to select samples for normalization
-(others keep original values). Default is \code{"SampleType"}.}
-
-\item{do_regexp}{Character. A regular expression pattern to select samples
-from \code{do_field} for normalization. Default is \code{"Sample"}.}
+performed within each group separately. Default is \code{"SampleType"}. Note
+that only study samples (SampleType == 'Sample') are normalized; QC,
+Calibrator, and Buffer samples are automatically excluded.}
 
 \item{reverse_existing}{Logical. Should existing median or ANML normalization be
 reversed before applying new normalization? When \code{TRUE}, existing median
@@ -144,11 +130,6 @@ reversed; QC, Calibrator, and Buffer samples retain their normalization
 # Internal reference from study samples (default)
 med_norm_adat <- medianNormalize(adat)
 
-# Reference from specific samples in the ADAT
-med_norm_adat <- medianNormalize(adat,
-                                 ref_field = "SampleType",
-                                 ref_value = "Sample")
-
 # Reference from another ADAT
 ref_adat <- read_adat("reference_file.adat")
 med_norm_adat <- medianNormalize(adat, reference = ref_adat)
@@ -162,11 +143,6 @@ med_norm_adat <- medianNormalize(adat, reference = ref_data)
 # (normalize within groups to account for expected biological differences)
 med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
 
-# Normalize only specific sample types
-med_norm_adat <- medianNormalize(adat,
-                                 do_field = "SampleType",
-                                 do_regexp = "Sample")
-
 # Re-normalize data that has already been median or ANML normalized
 # Use when you want to apply different normalization to previously normalized data
 med_norm_adat <- medianNormalize(adat,
diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
index 1c6b9627..a0b80f67 100644
--- a/tests/testthat/_snaps/medianNormalize.md
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -11,7 +11,7 @@
       Three dilution setup detected (standard setup).
     Message
       > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
-      > Building internal reference from field: "SampleType" with values: "Sample"
+      > Building internal reference from study samples (SampleType == 'Sample')
       v Processing dilution '0_005' with 50 analytes
       v Processing dilution '0_5' with 50 analytes
       v Processing dilution '20' with 50 analytes
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
index f8ad5541..8abef934 100644
--- a/tests/testthat/test-medianNormalize.R
+++ b/tests/testthat/test-medianNormalize.R
@@ -74,37 +74,7 @@ test_that("`medianNormalize` Method 1: Internal reference (default)", {
   expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
 })
 
-test_that("`medianNormalize` Method 2: Reference from specific samples", {
-
-  # Test with specific reference field and value - using QC as reference but normalizing both QC and Sample
-  expect_no_error(
-    result <- medianNormalize(test_data,
-                              ref_field = "SampleType",
-                              ref_value = "QC",
-                              do_regexp = "QC|Sample",  # Normalize both types
-                              verbose = FALSE)
-  )
-
-  # Check result structure
-  expect_true(is.soma_adat(result))
-  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
-  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
-
-  # Test scale factor output - normalized both QC and Sample types, so QC (3rd) gets scale factor 1.0
-  expect_equal(result$NormScale_20[1:3], c(0.9089, 0.9578, 1.0000), tolerance = 0.001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.9783, 0.9806, 1.0000), tolerance = 0.001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.9879, 1.1445, 1.0000), tolerance = 0.001)
-
-  # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(433.1, 454.4, 501.5), tolerance = 0.1)
-  expect_equal(result$seq.10008.43[1:3], c(510.6, 519.0, 510.1), tolerance = 0.1)
-
-  # Check header metadata
-  result_header <- attr(result, "Header.Meta")$HEADER
-  expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
-})
-
-test_that("`medianNormalize` Method 3: Reference from another ADAT", {
+test_that("`medianNormalize` Method 2: Reference from another ADAT", {
   # Create a minimal reference ADAT from sample subset
   ref_adat <- test_data[1:2, ]  # Use first 2 samples as reference
 
@@ -194,10 +164,10 @@ test_that("`medianNormalize` validates input requirements", {
     "No normalization scale factor columns found"
   )
 
-  # Test with invalid reference field
+  # Test with invalid adat object (not soma_adat)
   expect_error(
-    medianNormalize(test_data, ref_field = "NonExistentField", verbose = FALSE),
-    "Reference field `NonExistentField` not found"
+    medianNormalize(data.frame(wrong = 1:3), verbose = FALSE),
+    "`adat` must be a class `soma_adat` object"
   )
 
   # Test with invalid reference data.frame (missing required columns)
@@ -249,7 +219,7 @@ test_that("`medianNormalize` external reference data.frame validation", {
 test_that("`medianNormalize` handles grouping correctly", {
   # Test grouping by Sex
   expect_no_error(
-    result1 <- medianNormalize(test_data, by = "Sex", do_regexp = "Sample", verbose = FALSE)
+    result1 <- medianNormalize(test_data, by = "Sex", verbose = FALSE)
   )
 
   # Check result structure
@@ -294,43 +264,6 @@ test_that("`medianNormalize` handles grouping correctly", {
   )
 })
 
-test_that("`medianNormalize` handles sample selection correctly", {
-  # Test selective normalization
-  expect_no_error(
-    result <- medianNormalize(test_data,
-                              do_field = "SampleType",
-                              do_regexp = "QC",
-                              verbose = FALSE)
-  )
-  expect_true(is.soma_adat(result))
-
-  # Check result structure
-  expect_true(is.soma_adat(result))
-  norm_cols <- grep("^NormScale_", names(result), value = TRUE)
-  expect_equal(length(norm_cols), 3)  # Should have 3 dilution groups
-
-  # Test scale factor output - same as Method 2 for QC selection
-  expect_equal(result$NormScale_20[1:3], c(1.036936, 0.960225, 1.08410), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_005[1:3], c(0.857016, 0.848584, 1.05488), tolerance = 0.0001)
-  expect_equal(result$NormScale_0_5[1:3], c(0.777175, 0.85202, 0.96014), tolerance = 0.0001)
-
-  # Test specific SeqId columns
-  expect_equal(result$seq.10000.28[1:3], c(476.5, 474.4, 543.7))
-  expect_equal(result$seq.10008.43[1:3], c(561.8, 541.9, 553.0))
-
-  # Test with non-existent field
-  expect_error(
-    medianNormalize(test_data, do_field = "NonExistentField", verbose = FALSE),
-    "Field `NonExistentField` not found"
-  )
-
-  # Test with pattern that matches no samples
-  expect_error(
-    medianNormalize(test_data, do_regexp = "NoMatchPattern", verbose = FALSE),
-    "No samples selected for normalization"
-  )
-})
-
 test_that("`medianNormalize` produces expected verbose output", {
   # specific verbose output messages
   expect_snapshot(

From 7e2c44579f546c9e936528a4de61d8ac716083c5 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Thu, 26 Feb 2026 15:50:21 -0500
Subject: [PATCH 07/10] Export reverseMedianNormalize() function

- removed `reverse_existing` parameter from `medianNormalize()`,
  which now requires an unnormalized adat input
- updated `medianNormalize()` to now point to
  reverseMedianNormalize() if an already normalized
  adat is passed
---
 NAMESPACE                                    |   1 +
 R/medianNormalize.R                          | 421 +++++--------------
 R/reverseMedianNormalize.R                   | 301 +++++++++++++
 man/medianNormalize.Rd                       |  44 +-
 man/reverseMedianNormalize.Rd                |  59 +++
 tests/testthat/_snaps/medianNormalize.md     |   3 +-
 tests/testthat/test-medianNormalize.R        |  24 +-
 tests/testthat/test-reverseMedianNormalize.R |  84 ++++
 8 files changed, 585 insertions(+), 352 deletions(-)
 create mode 100644 R/reverseMedianNormalize.R
 create mode 100644 man/reverseMedianNormalize.Rd
 create mode 100644 tests/testthat/test-reverseMedianNormalize.R

diff --git a/NAMESPACE b/NAMESPACE
index a78cf5cf..f4834c9c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -124,6 +124,7 @@ export(read_adat)
 export(read_annotations)
 export(regexSeqId)
 export(rename)
+export(reverseMedianNormalize)
 export(right_join)
 export(rm_rn)
 export(rn2col)
diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index bf1aac89..856da4f0 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -59,11 +59,9 @@
 #'     maximum likelihood (ANML) method which leverages a population-based
 #'     reference that iteratively down-selects the set of analytes to include
 #'     for the normalization calculation.
-#'   \item The function requires `reverse_existing = TRUE` to be set in order
-#'     to process data where study samples have already undergone ANML or
-#'     standard median normalization.
-#'   \item When reversing existing normalization, only study samples are
-#'     reversed; QC, Calibrator, and Buffer samples retain their normalization
+#'   \item This function requires unnormalized data as input. If study samples
+#'     have already undergone median normalization (ANML or standard), first use
+#'     `reverseMedianNormalize()` to remove existing normalization.
 #' }
 #'
 #' @param adat A `soma_adat` object created using [read_adat()], containing
@@ -90,39 +88,34 @@
 #'   performed within each group separately. Default is `"SampleType"`. Note
 #'   that only study samples (SampleType == 'Sample') are normalized; QC,
 #'   Calibrator, and Buffer samples are automatically excluded.
-#' @param reverse_existing Logical. Should existing median or ANML normalization be
-#'   reversed before applying new normalization? When `TRUE`, existing median
-#'   normalization scale factors or ANML normalization effects are reversed for
-#'   study samples only (QC, Calibrator, and Buffer samples retain their
-#'   normalization). This allows re-normalization of data that has already been
-#'   median normalized or ANML normalized. Default is `FALSE`.
 #' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
 #' @return A `soma_adat` object with median normalization applied and RFU values
 #'   adjusted. The existing `NormScale_*` columns are updated to include the
 #'   effects of both plate scale normalization and median normalization.
 #' @examples
 #' \dontrun{
+#' # Starting with unnormalized ADAT
+#' unnormalized_adat <- read_adat("unnormalized_study_data.adat")
+#'
 #' # Internal reference from study samples (default)
-#' med_norm_adat <- medianNormalize(adat)
+#' med_norm_adat <- medianNormalize(unnormalized_adat)
 #'
 #' # Reference from another ADAT
 #' ref_adat <- read_adat("reference_file.adat")
-#' med_norm_adat <- medianNormalize(adat, reference = ref_adat)
+#' med_norm_adat <- medianNormalize(unnormalized_adat, reference = ref_adat)
 #'
 #' # External reference as a data.frame - requires `SeqId` and `Reference` columns
 #' ref_data <- read.csv("reference_file.csv")
-#' med_norm_adat <- medianNormalize(adat, reference = ref_data)
+#' med_norm_adat <- medianNormalize(unnormalized_adat, reference = ref_data)
 #'
 #' # Custom grouping by multiple variables
 #' # Use when total protein load changes due to analysis conditions
-#' # (normalize within groups to account for expected biological differences)
-#' med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
+#' med_norm_adat <- medianNormalize(unnormalized_adat, by = c("Sex", "SampleType"))
 #'
-#' # Re-normalize data that has already been median or ANML normalized
-#' # Use when you want to apply different normalization to previously normalized data
-#' med_norm_adat <- medianNormalize(adat,
-#'                                  reverse_existing = TRUE,
-#'                                  reference = new_reference)
+#' # If you already have normalized data, first reverse the normalization
+#' normalized_adat <- read_adat("normalized_study_data.adat")
+#' unnormalized_adat <- reverseMedianNormalize(normalized_adat)
+#' custom_norm_adat <- medianNormalize(unnormalized_adat, reference = new_reference)
 #' }
 #' @importFrom dplyr filter
 #' @importFrom stats median
@@ -130,7 +123,6 @@
 medianNormalize <- function(adat,
                             reference = NULL,
                             by = "SampleType",
-                            reverse_existing = FALSE,
                             verbose = TRUE) {
 
   # Input validation ----
@@ -172,26 +164,7 @@ medianNormalize <- function(adat,
   }
 
   # Check data state and existing normalization ----
-  has_existing_norm <- .validateDataState(adat, header, verbose, reverse_existing)
-
-  # Reverse existing normalization if requested ----
-  if (reverse_existing && has_existing_norm) {
-    if (verbose) {
-      if (grepl("ANML", header$ProcessSteps %||% "", ignore.case = TRUE)) {
-        cat("Reversing existing ANML normalization for study samples...\n")
-        adat <- .reverseANMLSMP(adat, verbose)
-      } else {
-        cat("Reversing existing median normalization for study samples...\n")
-        adat <- .reverseMedNormSMP(adat, verbose)
-      }
-    } else {
-      if (grepl("ANML", header$ProcessSteps %||% "", ignore.case = TRUE)) {
-        adat <- .reverseANMLSMP(adat, verbose)
-      } else {
-        adat <- .reverseMedNormSMP(adat, verbose)
-      }
-    }
-  }
+  .validateDataState(adat, header, verbose)
 
   # Create dilution groups ----
   apt_data <- getAnalyteInfo(adat)
@@ -257,10 +230,13 @@ medianNormalize <- function(adat,
       ref_data <- .buildInternalReference(adat, dil_groups)
     } else {
       # Standard internal reference - calculate per group
-      ref_data <- NULL
+      # Build internal reference for adding to metadata
       if (verbose) {
         .todo("Building internal reference from study samples (SampleType == 'Sample')")
       }
+      ref_data <- .buildInternalReference(adat, dil_groups)
+      # Mark as group-specific for normalization calculations
+      attr(ref_data, "group_specific") <- TRUE
     }
   } else {
     ref_data <- .processReference(reference, adat, dil_groups, apt_data, verbose)
@@ -318,6 +294,76 @@ medianNormalize <- function(adat,
 }
 
 
+#' Recalculate RowCheck after Median Normalization
+#'
+#' Recalculates RowCheck values as PASS or FLAG based on normalization acceptance
+#' criteria for row scale factors after median normalization. Samples with all
+#' row scale factors within the acceptance range (0.4 to 2.5) receive "PASS",
+#' while samples with any scale factor outside this range receive "FLAG".
+#'
+#' @param adat A `soma_adat` object after median normalization
+#' @param verbose Logical. Whether to print progress messages
+#' @return The `soma_adat` object with updated RowCheck values
+#' @noRd
+.recalculateRowCheck <- function(adat, verbose) {
+
+  if (verbose) {
+    cat("Recalculating RowCheck values based on normalization acceptance criteria...\n")
+  }
+
+  # Check if RowCheck column exists
+  if (!"RowCheck" %in% names(adat)) {
+    if (verbose) {
+      cat("No RowCheck column found to recalculate.\n")
+    }
+    return(adat)
+  }
+
+  # Find all normalization scale factor columns (NormScale_*)
+  scale_factor_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(scale_factor_cols) == 0) {
+    if (verbose) {
+      cat("No normalization scale factor columns found. Setting all RowCheck to PASS.\n")
+    }
+    adat$RowCheck <- "PASS"
+    return(adat)
+  }
+
+  # Define acceptance criteria range for row scale factors
+  min_scale <- 0.4
+  max_scale <- 2.5
+
+  # Calculate RowCheck for each sample
+  for (i in seq_len(nrow(adat))) {
+    # Get all scale factor values for this sample
+    scale_values <- as.numeric(adat[i, scale_factor_cols, drop = FALSE])
+    scale_values <- scale_values[!is.na(scale_values)]
+
+    # Check if all scale factors are within acceptance range
+    if (length(scale_values) == 0) {
+      # No scale factors available - default to PASS
+      adat$RowCheck[i] <- "PASS"
+    } else if (all(scale_values >= min_scale & scale_values <= max_scale)) {
+      adat$RowCheck[i] <- "PASS"
+    } else {
+      adat$RowCheck[i] <- "FLAG"
+    }
+  }
+
+  if (verbose) {
+    pass_count <- sum(adat$RowCheck == "PASS", na.rm = TRUE)
+    flag_count <- sum(adat$RowCheck == "FLAG", na.rm = TRUE)
+    cat("RowCheck values updated for", nrow(adat), "samples.\n")
+    cat("  - PASS:", pass_count, "samples\n")
+    cat("  - FLAG:", flag_count, "samples\n")
+    cat("  - Acceptance criteria: scale factors within [", min_scale, ", ", max_scale, "]\n")
+  }
+
+  adat
+}
+
+
 #' Process Reference Data
 #' @noRd
 .processReference <- function(reference, adat, dil_groups, apt_data, verbose) {
@@ -528,7 +574,7 @@ medianNormalize <- function(adat,
       }
 
       # Calculate reference values for this dilution
-      if (!is.null(ref_data) && dil_name %in% names(ref_data)) {
+      if (!is.null(ref_data) && dil_name %in% names(ref_data) && !isTRUE(attr(ref_data, "group_specific"))) {
         # Use external reference
         ref_values <- ref_data[[dil_name]]
 
@@ -618,7 +664,8 @@ medianNormalize <- function(adat,
   if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
     # Add median normalization to process steps
     if ("ProcessSteps" %in% names(header_meta$HEADER)) {
-      if (!grepl("MedNormSMP", header_meta$HEADER$ProcessSteps)) {
+      # Check for MedNormSMP that is not preceded by "rev-"
+      if (!grepl("(?<!rev-)MedNormSMP", header_meta$HEADER$ProcessSteps, perl = TRUE)) {
         header_meta$HEADER$ProcessSteps <- paste(
           header_meta$HEADER$ProcessSteps,
           "MedNormSMP",
@@ -656,31 +703,31 @@ medianNormalize <- function(adat,
 }
 
 
+
+
+
 #' Validate Data State for Median Normalization
 #' @noRd
-.validateDataState <- function(adat, header, verbose, reverse_existing = FALSE) {
+.validateDataState <- function(adat, header, verbose) {
 
   # Check if data is in a standard deliverable state
   process_steps <- header$ProcessSteps %||% ""
 
-  # Check for existing median normalization
-  has_mednorm <- grepl("medNormInt|MedNorm", process_steps, ignore.case = TRUE)
-  has_anml <- grepl("ANML", process_steps, ignore.case = TRUE)
+  # Check for existing median normalization on study samples (MedNormSMP only)
+  has_mednorm_smp <- grepl("MedNormSMP", process_steps, ignore.case = TRUE)
+  has_anml_smp <- grepl("anmlSMP", process_steps, ignore.case = TRUE)
 
-  if (has_mednorm && !reverse_existing) {
-    stop(
-      "Data appears to already be median normalized. ",
-      "Set reverse_existing = TRUE to reverse existing median normalization before applying new normalization. ",
-      "ProcessSteps: ", process_steps,
-      call. = FALSE
-    )
-  }
+  # Check if data has already been denormalized
+  # Look for specific reversal patterns
+  has_mednorm_reversal <- grepl("rev-(?:MedNormSMP|medNormInt|MedNorm)", process_steps, ignore.case = TRUE, perl = TRUE)
+  has_anml_reversal <- grepl("rev-(?:anmlSMP|ANML)", process_steps, ignore.case = TRUE, perl = TRUE)
 
-  if (has_anml && !reverse_existing) {
+  # If study samples have existing normalization, fail unless already properly denormalized
+  if ((has_mednorm_smp || has_anml_smp) && !(has_mednorm_reversal || has_anml_reversal)) {
     stop(
-      "Data appears to be ANML normalized. ",
-      "Set reverse_existing = TRUE to reverse existing ANML normalization before applying new normalization. ",
-      "ProcessSteps: ", process_steps,
+      "Study samples appear to already be normalized (ProcessSteps: ", process_steps, "). ",
+      "For medianNormalize(), please start with unnormalized data. ",
+      "If you only have normalized data, first use reverseMedianNormalize() to remove existing normalization.",
       call. = FALSE
     )
   }
@@ -688,13 +735,14 @@ medianNormalize <- function(adat,
   # Check for required normalization steps
   has_hyb <- grepl("Hyb|hybridization", process_steps, ignore.case = TRUE)
   has_plate_scale <- grepl("PlateScale|plate.?scale", process_steps, ignore.case = TRUE)
+  has_mednorm_int <- grepl("medNormInt", process_steps, ignore.case = TRUE)
 
   # Warn if not in standard deliverable state
-  if (!has_hyb || !has_plate_scale) {
+  if (!has_hyb || !has_plate_scale || !has_mednorm_int) {
     warning(
       "Data may not be in standard deliverable format. ",
       "Standard format requires hybridization normalization, median normalization of controls ",
-      "(buffer + calibrator), and plate scale normalization before applying median normalization. ",
+      "(medNormInt), and plate scale normalization before applying median normalization. ",
       "Current ProcessSteps: ", process_steps,
       call. = FALSE
     )
@@ -704,11 +752,12 @@ medianNormalize <- function(adat,
     cat("Data validation passed for median normalization.\n")
     cat("Standard deliverable checks:\n")
     cat("  - Hybridization normalization:", if(has_hyb) "PASS" else "WARN", "\n")
+    cat("  - medNormInt (controls):", if(has_mednorm_int) "PASS" else "WARN", "\n")
     cat("  - Plate scale normalization:", if(has_plate_scale) "PASS" else "WARN", "\n")
-    cat("  - No existing MedNorm/ANML:", if(!has_mednorm && !has_anml) "PASS" else if(reverse_existing && has_mednorm) "WARN (will reverse)" else "FAIL", "\n")
+    cat("  - Study samples not already normalized:", "PASS", "\n")
   }
 
-  return(has_mednorm || has_anml)
+  invisible(NULL)
 }
 
 
@@ -789,247 +838,5 @@ medianNormalize <- function(adat,
 }
 
 
-#' Reverse Existing Median Normalization for Study Samples
-#' @noRd
-.reverseMedNormSMP <- function(adat, verbose) {
-
-  # Get existing scale factors
-  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
-
-  if (length(sf_cols) == 0) {
-    if (verbose) {
-      cat("No existing median normalization scale factors found to reverse.\n")
-    }
-    return(adat)
-  }
-
-  if (verbose) {
-    cat("Reversing existing median normalization for study samples...\n")
-  }
-
-  # Get dilution groups
-  apt_data <- getAnalyteInfo(adat)
-  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
-  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
-  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
-
-  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
-  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
-
-  if (sum(sample_mask) == 0) {
-    if (verbose) {
-      cat("No study samples found to reverse normalization.\n")
-    }
-    return(adat)
-  }
-
-  # For each dilution group, reverse normalization
-  for (dil_name in names(dil_groups)) {
-    sf_col <- paste0("NormScale_", dil_name)
-
-    if (sf_col %in% names(adat)) {
-      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
-
-      if (length(dil_apts) > 0) {
-        for (i in which(sample_mask)) {
-          scale_factor <- adat[[sf_col]][i]
-          if (!is.na(scale_factor) && scale_factor != 0) {
-            # Apply inverse of scale factor
-            adat[i, dil_apts] <- adat[i, dil_apts] / scale_factor
-            # Reset scale factor to 1.0
-            adat[[sf_col]][i] <- 1.0
-          }
-        }
-      }
-    }
-  }
-
-  # Update ProcessSteps to indicate reversal
-  header_meta <- attr(adat, "Header.Meta")
-  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
-    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
-
-    # Add reversal step - look for what type of median norm was reversed
-    if (grepl("MedNormSMP", current_steps, ignore.case = TRUE)) {
-      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-MedNormSMP", sep = ", ")
-    } else if (grepl("medNormInt", current_steps, ignore.case = TRUE)) {
-      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-medNormInt", sep = ", ")
-    } else {
-      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-MedNorm", sep = ", ")
-    }
-
-    attr(adat, "Header.Meta") <- header_meta
-  }
-
-  if (verbose) {
-    cat("Median normalization reversed for", sum(sample_mask), "study samples.\n")
-  }
-
-  adat
-}
-
-
-#' Reverse Existing ANML Normalization for Study Samples
-#' @noRd
-.reverseANMLSMP <- function(adat, verbose) {
-
-  # Get existing scale factors
-  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
-
-  if (length(sf_cols) == 0) {
-    if (verbose) {
-      cat("No existing ANML normalization scale factors found to reverse.\n")
-    }
-    return(adat)
-  }
-
-  if (verbose) {
-    cat("Reversing existing ANML normalization for study samples...\n")
-  }
-
-  # Get dilution groups
-  apt_data <- getAnalyteInfo(adat)
-  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
-  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
-  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
-
-  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
-  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
 
-  if (sum(sample_mask) == 0) {
-    if (verbose) {
-      cat("No study samples found to reverse ANML normalization.\n")
-    }
-    return(adat)
-  }
 
-  # For each dilution group, reverse ANML normalization using log space
-  for (dil_name in names(dil_groups)) {
-    sf_col <- paste0("NormScale_", dil_name)
-
-    if (sf_col %in% names(adat)) {
-      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
-
-      if (length(dil_apts) > 0) {
-        for (i in which(sample_mask)) {
-          scale_factor <- adat[[sf_col]][i]
-          if (!is.na(scale_factor) && scale_factor != 0) {
-            # ANML uses log space scaling - reverse by subtracting log scale factor
-            log_sf <- log10(scale_factor)
-            log_data <- log10(as.numeric(adat[i, dil_apts]))
-            reversed_log_data <- log_data - log_sf
-            adat[i, dil_apts] <- 10^reversed_log_data
-            # Reset scale factor to 1.0
-            adat[[sf_col]][i] <- 1.0
-          }
-        }
-      }
-    }
-  }
-
-  # Remove ANMLFractionUsed columns if they exist
-  anml_frac_cols <- grep("^ANMLFractionUsed_", names(adat), value = TRUE)
-  if (length(anml_frac_cols) > 0) {
-    for (col in anml_frac_cols) {
-      if (all(is.na(adat[[col]][sample_mask]))) {
-        adat[[col]][sample_mask] <- NA
-      }
-    }
-  }
-
-  # Update ProcessSteps to indicate reversal
-  header_meta <- attr(adat, "Header.Meta")
-  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
-    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
-
-    # Add reversal step - look for what type of ANML was reversed
-    if (grepl("anmlSMP", current_steps, ignore.case = TRUE)) {
-      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-anmlSMP", sep = ", ")
-    } else if (grepl("ANML", current_steps, ignore.case = TRUE)) {
-      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-ANML", sep = ", ")
-    }
-
-    attr(adat, "Header.Meta") <- header_meta
-  }
-
-  # Reset RowCheck for reversed samples
-  if ("RowCheck" %in% names(adat)) {
-    adat$RowCheck[sample_mask] <- "PASS"
-  }
-
-  if (verbose) {
-    cat("ANML normalization reversed for", sum(sample_mask), "study samples.\n")
-  }
-
-  adat
-}
-
-
-#' Recalculate RowCheck after Median Normalization
-#'
-#' Recalculates RowCheck values as PASS or FLAG based on normalization acceptance
-#' criteria for row scale factors after median normalization. Samples with all
-#' row scale factors within the acceptance range (0.4 to 2.5) receive "PASS",
-#' while samples with any scale factor outside this range receive "FLAG".
-#'
-#' @param adat A `soma_adat` object after median normalization
-#' @param verbose Logical. Whether to print progress messages
-#' @return The `soma_adat` object with updated RowCheck values
-#' @noRd
-.recalculateRowCheck <- function(adat, verbose) {
-
-  if (verbose) {
-    cat("Recalculating RowCheck values based on normalization acceptance criteria...\n")
-  }
-
-  # Check if RowCheck column exists
-  if (!"RowCheck" %in% names(adat)) {
-    if (verbose) {
-      cat("No RowCheck column found to recalculate.\n")
-    }
-    return(adat)
-  }
-
-  # Find all normalization scale factor columns (NormScale_*)
-  scale_factor_cols <- grep("^NormScale_", names(adat), value = TRUE)
-
-  if (length(scale_factor_cols) == 0) {
-    if (verbose) {
-      cat("No normalization scale factor columns found. Setting all RowCheck to PASS.\n")
-    }
-    adat$RowCheck <- "PASS"
-    return(adat)
-  }
-
-  # Define acceptance criteria range for row scale factors
-  min_scale <- 0.4
-  max_scale <- 2.5
-
-  # Calculate RowCheck for each sample
-  for (i in seq_len(nrow(adat))) {
-    # Get all scale factor values for this sample
-    scale_values <- as.numeric(adat[i, scale_factor_cols, drop = FALSE])
-    scale_values <- scale_values[!is.na(scale_values)]
-
-    # Check if all scale factors are within acceptance range
-    if (length(scale_values) == 0) {
-      # No scale factors available - default to PASS
-      adat$RowCheck[i] <- "PASS"
-    } else if (all(scale_values >= min_scale & scale_values <= max_scale)) {
-      adat$RowCheck[i] <- "PASS"
-    } else {
-      adat$RowCheck[i] <- "FLAG"
-    }
-  }
-
-  if (verbose) {
-    pass_count <- sum(adat$RowCheck == "PASS", na.rm = TRUE)
-    flag_count <- sum(adat$RowCheck == "FLAG", na.rm = TRUE)
-    cat("RowCheck values updated for", nrow(adat), "samples.\n")
-    cat("  - PASS:", pass_count, "samples\n")
-    cat("  - FLAG:", flag_count, "samples\n")
-    cat("  - Acceptance criteria: scale factors within [", min_scale, ", ", max_scale, "]\n")
-  }
-
-  adat
-}
diff --git a/R/reverseMedianNormalize.R b/R/reverseMedianNormalize.R
new file mode 100644
index 00000000..9c43f024
--- /dev/null
+++ b/R/reverseMedianNormalize.R
@@ -0,0 +1,301 @@
+#' Reverse Median Normalization from Study Samples
+#'
+#' @description Reverses median normalization (including ANML) that was
+#' previously applied to study samples (`SampleType == "Sample"`). This function
+#' is designed to work with standard SomaScan deliverable ADAT files where
+#' study samples have undergone median normalization as the final processing step.
+#'
+#' This function validates that:
+#' \enumerate{
+#'   \item Study samples have a median normalization step applied
+#'   \item The normalization was the last transformation applied to study samples
+#'   \item The correct reversal method is applied based on the normalization type
+#' }
+#'
+#' @section Use Cases:
+#' \itemize{
+#'   \item Converting from normalized ADAT to unnormalized ADAT for custom normalization
+#'   \item Preparing normalized delivery data for use with `medianNormalize()` function
+#'   \item Backing out normalization to apply different normalization strategies
+#' }
+#'
+#' @section Data Requirements:
+#' \itemize{
+#'   \item ADAT file with study samples (`SampleType == "Sample"`) that have been
+#'     median normalized (either standard median normalization or ANML)
+#'   \item Intact header metadata with `ProcessSteps` field indicating the
+#'     normalization history
+#'   \item Median normalization must be the last processing step applied to study samples
+#' }
+#'
+#' @param adat A `soma_adat` object with study samples that have been median normalized
+#' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
+#' @return A `soma_adat` object with median normalization reversed for study samples.
+#'   QC, Calibrator, and Buffer samples retain their original normalization.
+#'   The `ProcessSteps` header is updated to reflect the reversal operation, 
+#'   and median normalization-specific metadata fields are cleared.
+#' @examples
+#' \dontrun{
+#' # Reverse normalization from a delivered ADAT file
+#' normalized_adat <- read_adat("normalized_study_data.adat")
+#' unnormalized_adat <- reverseMedianNormalize(normalized_adat)
+#' }
+#' @export
+reverseMedianNormalize <- function(adat, verbose = TRUE) {
+
+  # Input validation ----
+  stopifnot("`adat` must be a class `soma_adat` object" = is.soma_adat(adat))
+
+  # Get header metadata
+  header <- attr(adat, "Header.Meta")$HEADER
+
+  if (is.null(header)) {
+    stop("ADAT header metadata is missing", call. = FALSE)
+  }
+
+  # Validate that study samples have been normalized ----
+  process_steps <- header$ProcessSteps %||% ""
+
+  if (process_steps == "") {
+    stop(
+      "ProcessSteps field is empty. Cannot determine normalization history.",
+      call. = FALSE
+    )
+  }
+
+  # Check for evidence of study sample median normalization (MedNormSMP only)
+  has_mednorm_smp <- grepl("MedNormSMP", process_steps, ignore.case = TRUE)
+  has_anml_smp <- grepl("anmlSMP", process_steps, ignore.case = TRUE)
+
+  if (!has_mednorm_smp && !has_anml_smp) {
+    stop(
+      "No evidence of median normalization applied to study samples. ",
+      "ProcessSteps: ", process_steps, ". ",
+      "This function is designed to reverse median normalization from study samples.",
+      call. = FALSE
+    )
+  }
+
+  # Check that normalization hasn't already been reversed
+  has_reversal <- grepl("rev-(?:MedNormSMP|medNormInt|anmlSMP|ANML)", process_steps, ignore.case = TRUE, perl = TRUE)
+
+  if (has_reversal) {
+    stop(
+      "Data appears to have already been denormalized. ",
+      "ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+
+  # Determine which normalization type was applied to study samples ----
+  # Only one type should be applied, check which was last
+  if (has_anml_smp) {
+    normalization_type <- "anml"
+  } else if (has_mednorm_smp) {
+    normalization_type <- "median"
+  } else {
+    stop(
+      "Could not determine normalization type from ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+
+  # Apply the appropriate denormalization ----
+  if (normalization_type == "anml") {
+    adat <- .reverseANMLSMP(adat, verbose)
+  } else if (normalization_type == "median") {
+    adat <- .reverseMedNormSMP(adat, verbose)
+  }
+
+  # Remove medNormSMP_ReferenceRFU field from analyte metadata after reversal ----
+  col_meta <- attr(adat, "Col.Meta")
+  if (!is.null(col_meta) && "medNormSMP_ReferenceRFU" %in% names(col_meta)) {
+    col_meta$medNormSMP_ReferenceRFU <- NULL
+    attr(adat, "Col.Meta") <- col_meta
+    if (verbose) {
+      cat("Removed medNormSMP_ReferenceRFU field from analyte metadata.\n")
+    }
+  }
+
+  adat
+}
+
+
+#' Reverse Existing Median Normalization for Study Samples
+#' @noRd
+.reverseMedNormSMP <- function(adat, verbose) {
+
+  # Get existing scale factors
+  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(sf_cols) == 0) {
+    if (verbose) {
+      cat("No existing median normalization scale factors found to reverse.\n")
+    }
+    return(adat)
+  }
+
+  if (verbose) {
+    cat("Reversing existing median normalization for study samples...\n")
+  }
+
+  # Get dilution groups
+  apt_data <- getAnalyteInfo(adat)
+  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
+  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
+  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
+
+  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
+  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+
+  if (sum(sample_mask) == 0) {
+    if (verbose) {
+      cat("No study samples found to reverse normalization.\n")
+    }
+    return(adat)
+  }
+
+  # For each dilution group, reverse normalization
+  for (dil_name in names(dil_groups)) {
+    sf_col <- paste0("NormScale_", dil_name)
+
+    if (sf_col %in% names(adat)) {
+      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+
+      if (length(dil_apts) > 0) {
+        for (i in which(sample_mask)) {
+          scale_factor <- adat[[sf_col]][i]
+          if (!is.na(scale_factor) && scale_factor != 0) {
+            # Apply inverse of scale factor
+            adat[i, dil_apts] <- adat[i, dil_apts] / scale_factor
+            # Reset scale factor to 1.0
+            adat[[sf_col]][i] <- 1.0
+          }
+        }
+      }
+    }
+  }
+
+  # Update ProcessSteps to indicate reversal
+  header_meta <- attr(adat, "Header.Meta")
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
+
+    # Add reversal step
+    if (grepl("MedNormSMP", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-MedNormSMP", sep = ", ")
+    }
+
+    # Clear median normalization specific header fields
+    header_meta$HEADER$NormalizationAlgorithm <- NULL
+    header_meta$HEADER$MedNormReference <- NULL
+
+    attr(adat, "Header.Meta") <- header_meta
+  }
+
+  if (verbose) {
+    cat("Median normalization reversed for", sum(sample_mask), "study samples.\n")
+  }
+
+  adat
+}
+
+
+#' Reverse Existing ANML Normalization for Study Samples
+#' @noRd
+.reverseANMLSMP <- function(adat, verbose) {
+
+  # Get existing scale factors
+  sf_cols <- grep("^NormScale_", names(adat), value = TRUE)
+
+  if (length(sf_cols) == 0) {
+    if (verbose) {
+      cat("No existing ANML normalization scale factors found to reverse.\n")
+    }
+    return(adat)
+  }
+
+  if (verbose) {
+    cat("Reversing existing ANML normalization for study samples...\n")
+  }
+
+  # Get dilution groups
+  apt_data <- getAnalyteInfo(adat)
+  dil_groups <- split(apt_data$AptName, apt_data$Dilution)
+  names(dil_groups) <- gsub("\\.", "_", names(dil_groups))
+  names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
+
+  # Only reverse for study samples, leave QC/Calibrator/Buffer alone
+  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+
+  if (sum(sample_mask) == 0) {
+    if (verbose) {
+      cat("No study samples found to reverse ANML normalization.\n")
+    }
+    return(adat)
+  }
+
+  # For each dilution group, reverse ANML normalization using log space
+  for (dil_name in names(dil_groups)) {
+    sf_col <- paste0("NormScale_", dil_name)
+
+    if (sf_col %in% names(adat)) {
+      dil_apts <- intersect(dil_groups[[dil_name]], getAnalytes(adat))
+
+      if (length(dil_apts) > 0) {
+        for (i in which(sample_mask)) {
+          scale_factor <- adat[[sf_col]][i]
+          if (!is.na(scale_factor) && scale_factor != 0) {
+            # ANML uses log space scaling - reverse by subtracting log scale factor
+            log_sf <- log10(scale_factor)
+            log_data <- log10(as.numeric(adat[i, dil_apts]))
+            reversed_log_data <- log_data - log_sf
+            adat[i, dil_apts] <- 10^reversed_log_data
+            # Reset scale factor to 1.0
+            adat[[sf_col]][i] <- 1.0
+          }
+        }
+      }
+    }
+  }
+
+  # Remove ANMLFractionUsed columns if they exist
+  anml_frac_cols <- grep("^ANMLFractionUsed_", names(adat), value = TRUE)
+  if (length(anml_frac_cols) > 0) {
+    for (col in anml_frac_cols) {
+      if (all(is.na(adat[[col]][sample_mask]))) {
+        adat[[col]][sample_mask] <- NA
+      }
+    }
+  }
+
+  # Update ProcessSteps to indicate reversal
+  header_meta <- attr(adat, "Header.Meta")
+  if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
+    current_steps <- header_meta$HEADER$ProcessSteps %||% ""
+
+    # Add reversal step - look for what type of ANML was reversed
+    if (grepl("anmlSMP", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-anmlSMP", sep = ", ")
+    } else if (grepl("ANML", current_steps, ignore.case = TRUE)) {
+      header_meta$HEADER$ProcessSteps <- paste(current_steps, "rev-ANML", sep = ", ")
+    }
+
+    # Clear median normalization specific header fields
+    header_meta$HEADER$NormalizationAlgorithm <- NULL
+    header_meta$HEADER$MedNormReference <- NULL
+
+    attr(adat, "Header.Meta") <- header_meta
+  }
+
+  # Reset RowCheck for reversed samples
+  if ("RowCheck" %in% names(adat)) {
+    adat$RowCheck[sample_mask] <- "PASS"
+  }
+
+  if (verbose) {
+    cat("ANML normalization reversed for", sum(sample_mask), "study samples.\n")
+  }
+
+  adat
+}
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
index 88e8fcc5..0d33b250 100644
--- a/man/medianNormalize.Rd
+++ b/man/medianNormalize.Rd
@@ -4,13 +4,7 @@
 \alias{medianNormalize}
 \title{Perform Median Normalization}
 \usage{
-medianNormalize(
-  adat,
-  reference = NULL,
-  by = "SampleType",
-  reverse_existing = FALSE,
-  verbose = TRUE
-)
+medianNormalize(adat, reference = NULL, by = "SampleType", verbose = TRUE)
 }
 \arguments{
 \item{adat}{A \code{soma_adat} object created using \code{\link[=read_adat]{read_adat()}}, containing
@@ -40,13 +34,6 @@ performed within each group separately. Default is \code{"SampleType"}. Note
 that only study samples (SampleType == 'Sample') are normalized; QC,
 Calibrator, and Buffer samples are automatically excluded.}
 
-\item{reverse_existing}{Logical. Should existing median or ANML normalization be
-reversed before applying new normalization? When \code{TRUE}, existing median
-normalization scale factors or ANML normalization effects are reversed for
-study samples only (QC, Calibrator, and Buffer samples retain their
-normalization). This allows re-normalization of data that has already been
-median normalized or ANML normalized. Default is \code{FALSE}.}
-
 \item{verbose}{Logical. Should progress messages be printed? Default is \code{TRUE}.}
 }
 \value{
@@ -117,36 +104,35 @@ systematic bias between the biological groups intended for comparison.
 maximum likelihood (ANML) method which leverages a population-based
 reference that iteratively down-selects the set of analytes to include
 for the normalization calculation.
-\item The function requires \code{reverse_existing = TRUE} to be set in order
-to process data where study samples have already undergone ANML or
-standard median normalization.
-\item When reversing existing normalization, only study samples are
-reversed; QC, Calibrator, and Buffer samples retain their normalization
+\item This function requires unnormalized data as input. If study samples
+have already undergone median normalization (ANML or standard), first use
+\code{reverseMedianNormalize()} to remove existing normalization.
 }
 }
 
 \examples{
 \dontrun{
+# Starting with unnormalized ADAT
+unnormalized_adat <- read_adat("unnormalized_study_data.adat")
+
 # Internal reference from study samples (default)
-med_norm_adat <- medianNormalize(adat)
+med_norm_adat <- medianNormalize(unnormalized_adat)
 
 # Reference from another ADAT
 ref_adat <- read_adat("reference_file.adat")
-med_norm_adat <- medianNormalize(adat, reference = ref_adat)
+med_norm_adat <- medianNormalize(unnormalized_adat, reference = ref_adat)
 
 # External reference as a data.frame - requires `SeqId` and `Reference` columns
 ref_data <- read.csv("reference_file.csv")
-med_norm_adat <- medianNormalize(adat, reference = ref_data)
+med_norm_adat <- medianNormalize(unnormalized_adat, reference = ref_data)
 
 # Custom grouping by multiple variables
 # Use when total protein load changes due to analysis conditions
-# (normalize within groups to account for expected biological differences)
-med_norm_adat <- medianNormalize(adat, by = c("Sex", "SampleType"))
+med_norm_adat <- medianNormalize(unnormalized_adat, by = c("Sex", "SampleType"))
 
-# Re-normalize data that has already been median or ANML normalized
-# Use when you want to apply different normalization to previously normalized data
-med_norm_adat <- medianNormalize(adat,
-                                 reverse_existing = TRUE,
-                                 reference = new_reference)
+# If you already have normalized data, first reverse the normalization
+normalized_adat <- read_adat("normalized_study_data.adat")
+unnormalized_adat <- reverseMedianNormalize(normalized_adat)
+custom_norm_adat <- medianNormalize(unnormalized_adat, reference = new_reference)
 }
 }
diff --git a/man/reverseMedianNormalize.Rd b/man/reverseMedianNormalize.Rd
new file mode 100644
index 00000000..8d63423a
--- /dev/null
+++ b/man/reverseMedianNormalize.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reverseMedianNormalize.R
+\name{reverseMedianNormalize}
+\alias{reverseMedianNormalize}
+\title{Reverse Median Normalization from Study Samples}
+\usage{
+reverseMedianNormalize(adat, verbose = TRUE)
+}
+\arguments{
+\item{adat}{A \code{soma_adat} object with study samples that have been median normalized}
+
+\item{verbose}{Logical. Should progress messages be printed? Default is \code{TRUE}.}
+}
+\value{
+A \code{soma_adat} object with median normalization reversed for study samples.
+QC, Calibrator, and Buffer samples retain their original normalization.
+The \code{ProcessSteps} header is updated to reflect the reversal operation,
+and median normalization-specific metadata fields are cleared.
+}
+\description{
+Reverses median normalization (including ANML) that was
+previously applied to study samples (\code{SampleType == "Sample"}). This function
+is designed to work with standard SomaScan deliverable ADAT files where
+study samples have undergone median normalization as the final processing step.
+
+This function validates that:
+\enumerate{
+\item Study samples have a median normalization step applied
+\item The normalization was the last transformation applied to study samples
+\item The correct reversal method is applied based on the normalization type
+}
+}
+\section{Use Cases}{
+
+\itemize{
+\item Converting from normalized ADAT to unnormalized ADAT for custom normalization
+\item Preparing normalized delivery data for use with \code{medianNormalize()} function
+\item Backing out normalization to apply different normalization strategies
+}
+}
+
+\section{Data Requirements}{
+
+\itemize{
+\item ADAT file with study samples (\code{SampleType == "Sample"}) that have been
+median normalized (either standard median normalization or ANML)
+\item Intact header metadata with \code{ProcessSteps} field indicating the
+normalization history
+\item Median normalization must be the last processing step applied to study samples
+}
+}
+
+\examples{
+\dontrun{
+# Reverse normalization from a delivered ADAT file
+normalized_adat <- read_adat("normalized_study_data.adat")
+unnormalized_adat <- reverseMedianNormalize(normalized_adat)
+}
+}
diff --git a/tests/testthat/_snaps/medianNormalize.md b/tests/testthat/_snaps/medianNormalize.md
index a0b80f67..fb68f036 100644
--- a/tests/testthat/_snaps/medianNormalize.md
+++ b/tests/testthat/_snaps/medianNormalize.md
@@ -6,8 +6,9 @@
       Data validation passed for median normalization.
       Standard deliverable checks:
         - Hybridization normalization: PASS 
+        - medNormInt (controls): PASS 
         - Plate scale normalization: PASS 
-        - No existing MedNorm/ANML: PASS 
+        - Study samples not already normalized: PASS 
       Three dilution setup detected (standard setup).
     Message
       > Normalization scale factors already exist: "NormScale_20, NormScale_0_005, NormScale_0_5" - they will be replaced with new scale factors
diff --git a/tests/testthat/test-medianNormalize.R b/tests/testthat/test-medianNormalize.R
index 8abef934..13d93ca1 100644
--- a/tests/testthat/test-medianNormalize.R
+++ b/tests/testthat/test-medianNormalize.R
@@ -33,7 +33,7 @@ create_test_data <- function(small = FALSE) {
   header_meta <- attr(test_adat, "Header.Meta")
   if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
     # Remove ANML and median normalization steps to simulate pre-processed state
-    header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, plateScale, Calibration"
+    header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt, plateScale, Calibration"
     attr(test_adat, "Header.Meta") <- header_meta
   }
 
@@ -277,7 +277,7 @@ test_that("`medianNormalize` produces expected verbose output", {
   )
 })
 
-test_that("`medianNormalize` can reverse ANML normalization", {
+test_that("`medianNormalize` errors on already normalized data", {
   # Create ANML-like test data by modifying test_data
   anml_test_data <- test_data
 
@@ -297,22 +297,16 @@ test_that("`medianNormalize` can reverse ANML normalization", {
   anml_test_data$ANMLFractionUsed_0_5 <- c(0.88, 0.91, 0.83)
   anml_test_data$ANMLFractionUsed_0_005 <- c(0.90, 0.87, 0.85)
 
-  # Test ANML reversal
-  expect_no_error(
-    result <- medianNormalize(anml_test_data, reverse_existing = TRUE, verbose = FALSE)
+  # Test that medianNormalize errors on already normalized data
+  expect_error(
+    medianNormalize(anml_test_data, verbose = FALSE),
+    "Study samples appear to already be normalized"
   )
 
-  # Check that result is valid
-  expect_true(is.soma_adat(result))
-
-  # Check that ProcessSteps includes reversal and new normalization
-  result_header <- attr(result, "Header.Meta")$HEADER
-  expect_true(grepl("rev-anmlSMP", result_header$ProcessSteps))
-  expect_true(grepl("MedNormSMP", result_header$ProcessSteps))
-
-  # Test that error occurs without reverse_existing flag
+  # Test that the error message suggests reverseMedianNormalize
   expect_error(
-    medianNormalize(anml_test_data, reverse_existing = FALSE, verbose = FALSE),
+    medianNormalize(anml_test_data, verbose = FALSE),
+    "reverseMedianNormalize"
   )
 })
 
diff --git a/tests/testthat/test-reverseMedianNormalize.R b/tests/testthat/test-reverseMedianNormalize.R
new file mode 100644
index 00000000..fd35c405
--- /dev/null
+++ b/tests/testthat/test-reverseMedianNormalize.R
@@ -0,0 +1,84 @@
+# Setup ----
+data("example_data", package = "SomaDataIO")
+
+# Create small representative subset for faster tests
+test_data_full <- example_data[1:3, ]
+
+# Testing  ----
+test_that("`reverseMedianNormalize` reverses ANML normalization", {
+  # Use example_data which already has anmlSMP ProcessStep
+  anml_data <- test_data_full
+
+  # Verify it has anmlSMP in ProcessSteps (should already be there)
+  header_meta <- attr(anml_data, "Header.Meta")
+  expect_true(grepl("anmlSMP", header_meta$HEADER$ProcessSteps))
+
+  # Add ANML-like normalization scale factors
+  anml_data$NormScale_20 <- c(1.15, 0.95, 1.08)
+  anml_data$NormScale_0_5 <- c(1.22, 0.88, 1.12)
+  anml_data$NormScale_0_005 <- c(0.93, 1.07, 0.98)
+
+  # Reverse ANML normalization
+  expect_no_error(
+    result <- reverseMedianNormalize(anml_data, verbose = FALSE)
+  )
+
+  # Check that result is valid
+  expect_true(is.soma_adat(result))
+
+  # Check that ProcessSteps includes reversal
+  result_header <- attr(result, "Header.Meta")$HEADER
+  expect_true(grepl("rev-anmlSMP", result_header$ProcessSteps))
+
+  # Check that scale factors are reset to 1.0 for study samples
+  sample_mask <- result$SampleType == "Sample"
+  expect_true(all(result$NormScale_20[sample_mask] == 1.0))
+  expect_true(all(result$NormScale_0_5[sample_mask] == 1.0))
+  expect_true(all(result$NormScale_0_005[sample_mask] == 1.0))
+})
+
+test_that("`reverseMedianNormalize` error conditions", {
+  # Create unnormalized data by modifying ProcessSteps
+  unnormalized_data <- test_data_full
+  header_meta <- attr(unnormalized_data, "Header.Meta")
+  header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt, plateScale, Calibration"
+  attr(unnormalized_data, "Header.Meta") <- header_meta
+
+  # Test with unnormalized data (should error)
+  expect_error(
+    reverseMedianNormalize(unnormalized_data, verbose = FALSE),
+    "No evidence of median normalization applied to study samples"
+  )
+
+  # Test with non-soma_adat object
+  expect_error(
+    reverseMedianNormalize(data.frame(a = 1:3, b = 4:6)),
+    "`adat` must be a class `soma_adat` object"
+  )
+})
+
+test_that("`reverseMedianNormalize` + `medianNormalize` workflow", {
+  # Create normalized data with MedNormSMP
+  normalized_data <- test_data_full
+  header_meta <- attr(normalized_data, "Header.Meta")
+  header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt (SampleId), plateScale, Calibration, MedNormSMP"
+  attr(normalized_data, "Header.Meta") <- header_meta
+  normalized_data$NormScale_20 <- c(1.05, 0.98, 1.12)
+  normalized_data$NormScale_0_5 <- c(1.08, 0.92, 1.15)
+  normalized_data$NormScale_0_005 <- c(0.97, 1.03, 0.95)
+
+  # Test complete workflow: reverse then re-normalize
+  expect_no_error(
+    unnormalized <- reverseMedianNormalize(normalized_data, verbose = FALSE)
+  )
+
+  expect_no_error(
+    renormalized <- medianNormalize(unnormalized, verbose = FALSE)
+  )
+
+  # Check final result structure
+  expect_true(is.soma_adat(renormalized))
+  final_header <- attr(renormalized, "Header.Meta")$HEADER
+  expect_true(grepl("rev-MedNormSMP", final_header$ProcessSteps))
+  expect_true(grepl("MedNormSMP", final_header$ProcessSteps))
+})

From 7be8264b6f1d7a6a327ad75b3e4813ec1fa1c7e0 Mon Sep 17 00:00:00 2001
From: Alex Poole <alex.poole@standardbio.com>
Date: Thu, 12 Mar 2026 14:52:32 -0600
Subject: [PATCH 08/10] update medianNormalize function documentation

---
 R/medianNormalize.R | 7 +++----
 man/plot.Map.Rd     | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index 856da4f0..bc2b9aa5 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -17,7 +17,7 @@
 #' distribution aligns to a defined reference, thereby correcting
 #' global intensity shifts without altering relative differences between
 #' measurements within a sample. For assay formats with multiple dilution groups
-#' (e.g., 1:5 or 20%;  1:200 or 0.5%, 1:20,000 or 0.005%), separate scale
+#' (e.g., 1:5 or 20%;  1:200 or 0.5%; 1:20,000 or 0.005%), separate scale
 #' factors are calculated for each dilution because each dilution group is
 #' processed separately during the assay. For each sample, the ratio of
 #' reference RFU / observed RFU is calculated for every SeqId. The median ratio
@@ -41,7 +41,7 @@
 #' \itemize{
 #'   \item Combining data sets from the same overarching experiment or sample
 #'     population and normalize to a common reference that were originally
-#'     processed separately and each normalized "withing study".
+#'     processed separately and each normalized "within study".
 #'   \item Normalize fundamentally different types of samples separately (by
 #'     group). For instance, lysate samples from different cell lines that
 #'     will be analyzed separately should likely be median normalized within
@@ -70,8 +70,7 @@
 #'   \itemize{
 #'     \item `NULL` (default): Calculate an internal reference from study
 #'       samples by taking the median of each SeqId within the sample
-#'       grouping. For multi-plate studies, the median of all plate medians
-#'       is used.
+#'       grouping.
 #'     \item A `soma_adat` object: Extract reference from this ADAT
 #'     \item A data.frame: Use provided reference data directly
 #'   }
diff --git a/man/plot.Map.Rd b/man/plot.Map.Rd
index 1743ba08..67d9ce36 100644
--- a/man/plot.Map.Rd
+++ b/man/plot.Map.Rd
@@ -36,8 +36,8 @@
 \item \code{\link[grDevices:palettes]{grDevices::terrain.colors()}}
 \item \code{\link[grDevices:palettes]{grDevices::topo.colors()}}
 \item \code{\link[RColorBrewer:ColorBrewer]{RColorBrewer::brewer.pal()}}
-\item \code{\link[viridis:reexports]{viridis::viridis()}}
-\item \code{\link[viridis:reexports]{viridis::magma()}}
+\item \code{\link[viridis:viridis]{viridis::viridis()}}
+\item \code{\link[viridis:magma]{viridis::magma()}}
 }}
 
 \item{legend.ticks}{How many ticks to place on the color legend.}

From 0fd12b6e7ec07ad450bf54736292657aa896f379 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Mon, 16 Mar 2026 10:06:25 -0400
Subject: [PATCH 09/10] Minor medianNormalize msg and documentation edits

---
 R/medianNormalize.R    | 20 +++++++++++++-------
 man/medianNormalize.Rd |  7 +++----
 man/plot.Map.Rd        |  4 ++--
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index bc2b9aa5..d0183616 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -188,13 +188,6 @@ medianNormalize <- function(adat,
   # Check for existing normalization scale factors ----
   existing_norm_sf <- grep("^NormScale_", names(adat), value = TRUE)
 
-  if ( verbose ) {
-    if (length(existing_norm_sf) > 0) {
-      .todo("Normalization scale factors already exist: {.val {paste0(existing_norm_sf, collapse = ', ')}} - they will be replaced with new scale factors")
-    }
-  }
-
-
   # Determine which samples to normalize - only Sample types
   if (!"SampleType" %in% names(adat)) {
     stop("Field 'SampleType' not found in adat columns", call. = FALSE)
@@ -209,6 +202,19 @@ medianNormalize <- function(adat,
   }
   dont_samples <- setdiff(seq_len(nrow(adat)), do_samples)
 
+  # Check existing normalization scale factors and provide appropriate message ----
+  if (verbose && length(existing_norm_sf) > 0) {
+    # Check if all scale factors are 1.0 for samples being normalized
+    sample_sf_data <- adat[do_samples, existing_norm_sf, drop = FALSE]
+    all_ones <- all(sapply(sample_sf_data, function(x) all(abs(x - 1.0) < 1e-10, na.rm = TRUE)))
+    
+    if (all_ones) {
+      .todo("Normalization scale factor columns already exist: {.val {paste0(existing_norm_sf, collapse = ', ')}} - they will be replaced with new scale factors")
+    } else {
+      .todo("Normalization scale factors already exist: {.val {paste0(existing_norm_sf, collapse = ', ')}} - they will be replaced with new scale factors")
+    }
+  }
+
   # Process reference ----
   if (is.null(reference)) {
     # Check if SampleType conflicts with grouping variables
diff --git a/man/medianNormalize.Rd b/man/medianNormalize.Rd
index 0d33b250..1273f902 100644
--- a/man/medianNormalize.Rd
+++ b/man/medianNormalize.Rd
@@ -14,8 +14,7 @@ RFU values that have been hybridization normalized and plate scaled.}
 \itemize{
 \item \code{NULL} (default): Calculate an internal reference from study
 samples by taking the median of each SeqId within the sample
-grouping. For multi-plate studies, the median of all plate medians
-is used.
+grouping.
 \item A \code{soma_adat} object: Extract reference from this ADAT
 \item A data.frame: Use provided reference data directly
 }
@@ -59,7 +58,7 @@ The method scales each sample so that the center of the within-sample analyte
 distribution aligns to a defined reference, thereby correcting
 global intensity shifts without altering relative differences between
 measurements within a sample. For assay formats with multiple dilution groups
-(e.g., 1:5 or 20\%;  1:200 or 0.5\%, 1:20,000 or 0.005\%), separate scale
+(e.g., 1:5 or 20\%;  1:200 or 0.5\%; 1:20,000 or 0.005\%), separate scale
 factors are calculated for each dilution because each dilution group is
 processed separately during the assay. For each sample, the ratio of
 reference RFU / observed RFU is calculated for every SeqId. The median ratio
@@ -84,7 +83,7 @@ steps applied.
 \itemize{
 \item Combining data sets from the same overarching experiment or sample
 population and normalize to a common reference that were originally
-processed separately and each normalized "withing study".
+processed separately and each normalized "within study".
 \item Normalize fundamentally different types of samples separately (by
 group). For instance, lysate samples from different cell lines that
 will be analyzed separately should likely be median normalized within
diff --git a/man/plot.Map.Rd b/man/plot.Map.Rd
index 67d9ce36..1743ba08 100644
--- a/man/plot.Map.Rd
+++ b/man/plot.Map.Rd
@@ -36,8 +36,8 @@
 \item \code{\link[grDevices:palettes]{grDevices::terrain.colors()}}
 \item \code{\link[grDevices:palettes]{grDevices::topo.colors()}}
 \item \code{\link[RColorBrewer:ColorBrewer]{RColorBrewer::brewer.pal()}}
-\item \code{\link[viridis:viridis]{viridis::viridis()}}
-\item \code{\link[viridis:magma]{viridis::magma()}}
+\item \code{\link[viridis:reexports]{viridis::viridis()}}
+\item \code{\link[viridis:reexports]{viridis::magma()}}
 }}
 
 \item{legend.ticks}{How many ticks to place on the color legend.}

From 346fb5681bc36f354535fc783d2217cf0bc77e54 Mon Sep 17 00:00:00 2001
From: scheidec <calebjscheidel@gmail.com>
Date: Mon, 16 Mar 2026 15:38:21 -0400
Subject: [PATCH 10/10] Add more robust checks to medNorm functions

- properly handle data extraction from multiple scale factor columns
- added logic to preserve soma_adat attributes during rbind operations
- improved SampleType matching precision and clearing of
  ANMLFractionUsed column in reverseMedianMormalize()
---
 R/medianNormalize.R                          | 35 +++++++++--
 R/reverseMedianNormalize.R                   | 63 +++++++++++++++-----
 tests/testthat/test-reverseMedianNormalize.R | 37 ++++++++++++
 3 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/R/medianNormalize.R b/R/medianNormalize.R
index d0183616..63d89eaf 100644
--- a/R/medianNormalize.R
+++ b/R/medianNormalize.R
@@ -193,10 +193,10 @@ medianNormalize <- function(adat,
     stop("Field 'SampleType' not found in adat columns", call. = FALSE)
   }
 
-  do_samples <- grep("Sample", adat[["SampleType"]])
+  do_samples <- which(adat$SampleType %in% "Sample")
   if (length(do_samples) == 0L) {
     stop(
-      "No samples selected for normalization with pattern: Sample",
+      "No samples selected for normalization with SampleType == 'Sample'",
       call. = FALSE
     )
   }
@@ -207,7 +207,7 @@ medianNormalize <- function(adat,
     # Check if all scale factors are 1.0 for samples being normalized
     sample_sf_data <- adat[do_samples, existing_norm_sf, drop = FALSE]
     all_ones <- all(sapply(sample_sf_data, function(x) all(abs(x - 1.0) < 1e-10, na.rm = TRUE)))
-    
+
     if (all_ones) {
       .todo("Normalization scale factor columns already exist: {.val {paste0(existing_norm_sf, collapse = ', ')}} - they will be replaced with new scale factors")
     } else {
@@ -281,7 +281,18 @@ medianNormalize <- function(adat,
 
     # Ensure column order matches
     unnorm_adat <- unnorm_adat[, names(norm_adat)]
-    norm_adat <- rbind(norm_adat, unnorm_adat)
+
+    # Combine normalized and unnormalized samples in a way that preserves
+    # soma_adat attributes and column metadata.
+    norm_attrs <- attributes(norm_adat)
+    combined_df <- rbind(as.data.frame(norm_adat), as.data.frame(unnorm_adat))
+    # Restore non-dimension attributes (e.g., Col.Meta, header metadata)
+    # while keeping the combined row.names and names.
+    for (att in setdiff(names(norm_attrs), c("row.names", "names", "class"))) {
+      attr(combined_df, att) <- norm_attrs[[att]]
+    }
+    class(combined_df) <- norm_attrs[["class"]]
+    norm_adat <- combined_df
   }
 
   # Restore original order
@@ -342,7 +353,7 @@ medianNormalize <- function(adat,
   # Calculate RowCheck for each sample
   for (i in seq_len(nrow(adat))) {
     # Get all scale factor values for this sample
-    scale_values <- as.numeric(adat[i, scale_factor_cols, drop = FALSE])
+    scale_values <- unlist(adat[i, scale_factor_cols, drop = FALSE], use.names = FALSE)
     scale_values <- scale_values[!is.na(scale_values)]
 
     # Check if all scale factors are within acceptance range
@@ -515,6 +526,9 @@ medianNormalize <- function(adat,
   # Store original rownames to restore later
   original_rownames <- rownames(adat)
 
+  # Preserve original attributes to restore later
+  original_attrs <- attributes(adat)
+
   # Validate grouping variables
   if (is.character(by) && length(by) > 0) {
     missing_cols <- setdiff(by, names(adat))
@@ -622,7 +636,7 @@ medianNormalize <- function(adat,
 
       # Calculate scale factors for each sample in this group
       for (i in seq_len(nrow(grp_adat))) {
-        sample_values <- as.numeric(grp_adat[i, dil_apts, drop = FALSE])
+        sample_values <- unlist(grp_adat[i, dil_apts, drop = FALSE], use.names = FALSE)
         ratios <- grp_ref_values / sample_values
         med_scale_factor <- median(ratios[is.finite(ratios)], na.rm = TRUE)
 
@@ -652,6 +666,15 @@ medianNormalize <- function(adat,
   # Remove temporary grouping column
   adat$.group <- NULL
 
+  # Reattach original soma_adat attributes lost during rbind
+  if (!is.null(original_attrs)) {
+    # Preserve current column names and row.names; restore all other attributes
+    attrs_to_restore <- setdiff(names(original_attrs), c("names", "row.names"))
+    for (nm in attrs_to_restore) {
+      attr(adat, nm) <- original_attrs[[nm]]
+    }
+  }
+
   # Round to 1 decimal place (standard for SomaScan data)
   apts <- getAnalytes(adat)
   for (apt in apts) {
diff --git a/R/reverseMedianNormalize.R b/R/reverseMedianNormalize.R
index 9c43f024..649145ac 100644
--- a/R/reverseMedianNormalize.R
+++ b/R/reverseMedianNormalize.R
@@ -32,7 +32,7 @@
 #' @param verbose Logical. Should progress messages be printed? Default is `TRUE`.
 #' @return A `soma_adat` object with median normalization reversed for study samples.
 #'   QC, Calibrator, and Buffer samples retain their original normalization.
-#'   The `ProcessSteps` header is updated to reflect the reversal operation, 
+#'   The `ProcessSteps` header is updated to reflect the reversal operation,
 #'   and median normalization-specific metadata fields are cleared.
 #' @examples
 #' \dontrun{
@@ -63,9 +63,13 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
     )
   }
 
-  # Check for evidence of study sample median normalization (MedNormSMP only)
-  has_mednorm_smp <- grepl("MedNormSMP", process_steps, ignore.case = TRUE)
-  has_anml_smp <- grepl("anmlSMP", process_steps, ignore.case = TRUE)
+  # Tokenize ProcessSteps so we can reason about ordering of steps ----
+  step_tokens <- unlist(strsplit(process_steps, "\\s*[;,]\\s*"))
+  step_tokens <- step_tokens[nzchar(step_tokens)]
+
+  # Check for evidence of study sample median normalization (MedNormSMP / anmlSMP)
+  has_mednorm_smp <- any(grepl("MedNormSMP", step_tokens, ignore.case = TRUE))
+  has_anml_smp <- any(grepl("anmlSMP", step_tokens, ignore.case = TRUE))
 
   if (!has_mednorm_smp && !has_anml_smp) {
     stop(
@@ -77,7 +81,8 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
   }
 
   # Check that normalization hasn't already been reversed
-  has_reversal <- grepl("rev-(?:MedNormSMP|medNormInt|anmlSMP|ANML)", process_steps, ignore.case = TRUE, perl = TRUE)
+  has_reversal <- any(grepl("rev-(?:MedNormSMP|medNormInt|anmlSMP|ANML)", step_tokens,
+                            ignore.case = TRUE, perl = TRUE))
 
   if (has_reversal) {
     stop(
@@ -89,9 +94,30 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
 
   # Determine which normalization type was applied to study samples ----
   # Only one type should be applied, check which was last
-  if (has_anml_smp) {
+  # Identify the last normalization-related token and ensure it is the final step
+  norm_idx <- which(
+    grepl("MedNormSMP", step_tokens, ignore.case = TRUE) |
+      grepl("anmlSMP", step_tokens, ignore.case = TRUE)
+  )
+  if (length(norm_idx) == 0L) {
+    stop(
+      "Could not determine normalization type from ProcessSteps: ", process_steps,
+      call. = FALSE
+    )
+  }
+  last_norm_idx <- norm_idx[length(norm_idx)]
+  if (last_norm_idx != length(step_tokens)) {
+    stop(
+      "Median/ANML normalization of study samples is not the final processing step. ",
+      "ProcessSteps: ", process_steps, ". ",
+      "Reversal requires normalization to be the last transformation applied to study samples.",
+      call. = FALSE
+    )
+  }
+  last_norm_token <- step_tokens[last_norm_idx]
+  if (grepl("anmlSMP", last_norm_token, ignore.case = TRUE)) {
     normalization_type <- "anml"
-  } else if (has_mednorm_smp) {
+  } else if (grepl("MedNormSMP", last_norm_token, ignore.case = TRUE)) {
     normalization_type <- "median"
   } else {
     stop(
@@ -146,9 +172,12 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
   names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
 
   # Only reverse for study samples, leave QC/Calibrator/Buffer alone
-  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+  if (is.null(adat$SampleType)) {
+    stop("`SampleType` column is missing; cannot identify study samples to reverse normalization.", call. = FALSE)
+  }
+  sample_mask <- !is.na(adat$SampleType) & (adat$SampleType %in% "Sample")
 
-  if (sum(sample_mask) == 0) {
+  if (!any(sample_mask)) {
     if (verbose) {
       cat("No study samples found to reverse normalization.\n")
     }
@@ -226,9 +255,12 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
   names(dil_groups) <- gsub("[.]0$|%|^[.]", "", names(dil_groups))
 
   # Only reverse for study samples, leave QC/Calibrator/Buffer alone
-  sample_mask <- grepl("Sample|sample", adat$SampleType %||% adat$SampleId %||% "", ignore.case = TRUE)
+  if (is.null(adat$SampleType)) {
+    stop("`SampleType` column is missing; cannot identify study samples to reverse normalization.", call. = FALSE)
+  }
+  sample_mask <- !is.na(adat$SampleType) & (adat$SampleType %in% "Sample")
 
-  if (sum(sample_mask) == 0) {
+  if (!any(sample_mask)) {
     if (verbose) {
       cat("No study samples found to reverse ANML normalization.\n")
     }
@@ -248,7 +280,7 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
           if (!is.na(scale_factor) && scale_factor != 0) {
             # ANML uses log space scaling - reverse by subtracting log scale factor
             log_sf <- log10(scale_factor)
-            log_data <- log10(as.numeric(adat[i, dil_apts]))
+            log_data <- log10(as.numeric(unlist(adat[i, dil_apts], use.names = FALSE)))
             reversed_log_data <- log_data - log_sf
             adat[i, dil_apts] <- 10^reversed_log_data
             # Reset scale factor to 1.0
@@ -259,13 +291,12 @@ reverseMedianNormalize <- function(adat, verbose = TRUE) {
     }
   }
 
-  # Remove ANMLFractionUsed columns if they exist
+  # Clear ANMLFractionUsed columns for reversed study samples
   anml_frac_cols <- grep("^ANMLFractionUsed_", names(adat), value = TRUE)
   if (length(anml_frac_cols) > 0) {
     for (col in anml_frac_cols) {
-      if (all(is.na(adat[[col]][sample_mask]))) {
-        adat[[col]][sample_mask] <- NA
-      }
+      # Clear ANML-specific metadata for study samples that were reversed
+      adat[[col]][sample_mask] <- NA
     }
   }
 
diff --git a/tests/testthat/test-reverseMedianNormalize.R b/tests/testthat/test-reverseMedianNormalize.R
index fd35c405..9041c6fe 100644
--- a/tests/testthat/test-reverseMedianNormalize.R
+++ b/tests/testthat/test-reverseMedianNormalize.R
@@ -82,3 +82,40 @@ test_that("`reverseMedianNormalize` + `medianNormalize` workflow", {
   expect_true(grepl("rev-MedNormSMP", final_header$ProcessSteps))
   expect_true(grepl("MedNormSMP", final_header$ProcessSteps))
 })
+
+test_that("`reverseMedianNormalize` validates normalization is final step", {
+  # Test error when normalization is not the final step
+  test_data_invalid <- test_data_full
+  header_meta <- attr(test_data_invalid, "Header.Meta")
+  header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt, anmlSMP, plateScale"  # anmlSMP not final
+  attr(test_data_invalid, "Header.Meta") <- header_meta
+
+  expect_error(
+    reverseMedianNormalize(test_data_invalid, verbose = FALSE),
+    "Median/ANML normalization of study samples is not the final processing step"
+  )
+})
+
+test_that("`reverseMedianNormalize` handles ANMLFractionUsed columns properly", {
+  # Test clearing ANML-specific metadata columns
+  test_data_anml <- test_data_full
+  header_meta <- attr(test_data_anml, "Header.Meta")
+  header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt, plateScale, anmlSMP"
+  attr(test_data_anml, "Header.Meta") <- header_meta
+
+  # Add ANML metadata columns
+  test_data_anml$ANMLFractionUsed_20 <- c(0.85, 0.92, 0.78)
+  test_data_anml$ANMLFractionUsed_0_5 <- c(0.91, 0.87, 0.95)
+  test_data_anml$NormScale_20 <- c(1.15, 0.95, 1.08)
+  test_data_anml$NormScale_0_5 <- c(1.22, 0.88, 1.12)
+
+  result <- reverseMedianNormalize(test_data_anml, verbose = FALSE)
+
+  # ANMLFractionUsed columns should be cleared for study samples
+  expect_true(is.na(result$ANMLFractionUsed_20[1]))  # Sample should be NA
+  expect_true(is.na(result$ANMLFractionUsed_0_5[1]))  # Sample should be NA
+
+  # Scale factors should be reset to 1.0 for study samples
+  expect_equal(result$NormScale_20[1], 1.0)
+  expect_equal(result$NormScale_0_5[1], 1.0)
+})