From 47ae7d07aaa1779287cce5d2388fa82eb89f10ee Mon Sep 17 00:00:00 2001 From: Kate Lawrence Date: Thu, 30 Oct 2025 10:38:40 -0700 Subject: [PATCH 1/3] add rss_qc as default qc_method --- R/colocboost_pipeline.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/colocboost_pipeline.R b/R/colocboost_pipeline.R index 2b22d712..d12bd6d9 100644 --- a/R/colocboost_pipeline.R +++ b/R/colocboost_pipeline.R @@ -554,6 +554,7 @@ qc_regional_data <- function(region_data, qc_method = c("rss_qc", "dentist", "slalom"), impute = TRUE, impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) { + if (!is.null(qc_method)) qc_method <- match.arg(qc_method) #### related internal functions # Add context names to colname of Y if missing add_context_to_Y <- function(res_Y) { @@ -675,6 +676,7 @@ qc_regional_data <- function(region_data, qc_method = c("rss_qc", "dentist", "slalom"), impute = TRUE, impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) { + if (!is.null(qc_method)) qc_method <- match.arg(qc_method) n_LD <- length(sumstat_data$LD_info) final_sumstats <- final_LD <- NULL LD_match <- c() From 4d60845831620da3a0b8d8a43be9f54aa4d9880f Mon Sep 17 00:00:00 2001 From: Kate Lawrence Date: Thu, 30 Oct 2025 10:42:40 -0700 Subject: [PATCH 2/3] add default pip_cutoff for multiple individual contexts or sumstats --- R/colocboost_pipeline.R | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/R/colocboost_pipeline.R b/R/colocboost_pipeline.R index d12bd6d9..5b98aab4 100644 --- a/R/colocboost_pipeline.R +++ b/R/colocboost_pipeline.R @@ -615,6 +615,15 @@ qc_regional_data <- function(region_data, # - add context to colname of Y Y <- add_context_to_Y(Y) n_context <- length(X) + # Validate pip_cutoff_to_skip_ind length: allow scalar or exact length; otherwise error + if (length(pip_cutoff_to_skip_ind) == 1) { + pip_cutoff_vec <- rep(pip_cutoff_to_skip_ind, n_context) + } else if (length(pip_cutoff_to_skip_ind) == n_context) { + pip_cutoff_vec <- pip_cutoff_to_skip_ind + } else { + stop(sprintf("pip_cutoff_to_skip_ind length (%d) must be 1 or equal to number of contexts (%d)", + length(pip_cutoff_to_skip_ind), n_context)) + } residual_X <- residual_Y <- list() keep_contexts <- c() for (i_context in 1:n_context) { @@ -626,8 +635,8 @@ qc_regional_data <- function(region_data, # - remove variants with maf < maf_cutoff # tmp <- filter_resX_maf(resX, maf, maf_cutoff = maf_cutoff) resX <- filter_X(resX, missing_rate_thresh = NULL, maf_thresh = maf_cutoff, maf = maf) - # Initial PIP check - resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_to_skip_ind[i_context], context = context) + # Initial PIP check (use recycled vector element) + resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_vec[i_context], context = context) if (!is.null(resY)) { residual_X <- c(residual_X, list(resX)) residual_Y <- c(residual_Y, list(resY)) @@ -689,7 +698,26 @@ qc_regional_data <- function(region_data, n <- sumstat$n var_y <- sumstat$var_y conditions_sumstat <- names(sumstats)[ii] - pip_cutoff_to_skip_ld <- pip_cutoff_to_skip_sumstat[conditions_sumstat] %>% as.numeric() + # Determine PIP cutoff per sumstat with strict validation: + # - scalar applies to all studies + # - length equals number of studies -> positional mapping + # - named vector that covers ALL study names -> name-based mapping + # Otherwise: throw error (do not silently default) + if (length(pip_cutoff_to_skip_sumstat) == 1) { + pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat) + } else if (length(pip_cutoff_to_skip_sumstat) == length(sumstats) && is.null(names(pip_cutoff_to_skip_sumstat))) { + pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat[ii]) + } else if (!is.null(names(pip_cutoff_to_skip_sumstat))) { + missing_names <- setdiff(names(sumstats), names(pip_cutoff_to_skip_sumstat)) + if (length(missing_names) > 0) { + stop(sprintf("pip_cutoff_to_skip_sumstat must provide values for all studies; missing: %s", + paste(missing_names, collapse = ", "))) + } + pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat[conditions_sumstat]) + } else { + stop(sprintf("pip_cutoff_to_skip_sumstat length (%d) must be 1, equal to number of studies (%d), or be a named vector covering all studies", + length(pip_cutoff_to_skip_sumstat), length(sumstats))) + } # Preprocess the input data preprocess_results <- rss_basic_qc(sumstat$sumstats, LD_data, remove_indels = remove_indels) From 705eae49f8b92fdeea9d82299481b48bcaa8b19d Mon Sep 17 00:00:00 2001 From: Kate Lawrence Date: Thu, 30 Oct 2025 10:58:10 -0700 Subject: [PATCH 3/3] get names from region_name_col even if extract_region_name isn't provided. Add fallback region nameing (reigon_1, region_2, ..) --- R/file_utils.R | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/R/file_utils.R b/R/file_utils.R index ce3253ff..6e2444a6 100644 --- a/R/file_utils.R +++ b/R/file_utils.R @@ -243,7 +243,20 @@ load_phenotype_data <- function(phenotype_path, region, extract_region_name = NU stop("region_name_col is out of bounds for the number of columns in tabix_data.") } } else { - return(tabix_data %>% t()) + # No extract_region_name filter: still transpose and assign names appropriately + result <- tabix_data %>% t() + if (!is.null(region_name_col) && (region_name_col %% 1 == 0)) { + if (region_name_col <= ncol(tabix_data)) { + # After transpose, the desired name row is at index region_name_col + colnames(result) <- result[region_name_col, ] + } else { + stop("region_name_col is out of bounds for the number of columns in tabix_data.") + } + } else if (is.null(colnames(result))) { + # Fallback: synthesize names if still missing + colnames(result) <- paste0("region_", seq_len(ncol(result))) + } + return(result) } }))