diff --git a/NAMESPACE b/NAMESPACE index ae0fd67..00b6170 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(get_cos_purity) export(get_cos_summary) export(get_hierarchical_clusters) export(get_robust_colocalization) +export(get_robust_ucos) export(get_ucos_summary) importFrom(grDevices,adjustcolor) importFrom(graphics,abline) diff --git a/R/colocboost.R b/R/colocboost.R index 316fb32..3f9f860 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -210,7 +210,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD, effect_est = effect_est, effect_se = effect_se, effect_n = effect_n, overlap_variables = overlap_variables, - M = M, min_abs_corr = min_abs_corr + M = M, min_abs_corr = min_abs_corr, + jk_equiv_corr = jk_equiv_corr, + jk_equiv_loglik = jk_equiv_loglik, + func_simplex = func_simplex, + cos_npc_cutoff = cos_npc_cutoff, + npc_outcome_cutoff = npc_outcome_cutoff ) if (is.null(validated_data)) { return(NULL) @@ -235,6 +240,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data jk_equiv_corr <- validated_data$jk_equiv_corr jk_equiv_loglik <- validated_data$jk_equiv_loglik func_simplex <- validated_data$func_simplex + cos_npc_cutoff <- validated_data$cos_npc_cutoff + npc_outcome_cutoff <- validated_data$npc_outcome_cutoff + if (M == 1){ + cos_npc_cutoff <- 0 + npc_outcome_cutoff <- 0 + } # - initial colocboost object keep_variables <- c(keep_variable_individual, keep_variable_sumstat) @@ -338,6 +349,20 @@ colocboost <- function(X = NULL, Y = NULL, # individual data weight_fudge_factor = weight_fudge_factor, coverage = coverage ) + # ---- post filtering of the colocboost results (get robust trait-specific events) + if ("ucos_details" %in% names(cb_output)) { + if (is.null(pvalue_cutoff)){ + pvalue_cutoff_ucos <- NULL + } else { + # only keep the suggestive significant results + pvalue_cutoff_ucos <- ifelse(pvalue_cutoff > 1e-5, 1e-5, pvalue_cutoff) + } + cb_output <- get_robust_ucos( + cb_output, + npc_outcome_cutoff = npc_outcome_cutoff, + pvalue_cutoff = pvalue_cutoff_ucos + ) + } return(cb_output) } @@ -387,7 +412,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL, dict_YX = NULL, dict_sumstatLD = NULL, effect_est = NULL, effect_se = NULL, effect_n = NULL, overlap_variables = FALSE, - M = 500, min_abs_corr = 0.5) { + M = 500, min_abs_corr = 0.5, + jk_equiv_corr = 0.8, jk_equiv_loglik = 1, + func_simplex = "LD_z2z", + cos_npc_cutoff = 0.2, + npc_outcome_cutoff = 0.2) { # - check individual level data if (!is.null(X) & !is.null(Y)) { @@ -624,9 +653,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL, # --- check input of LD M_updated <- M min_abs_corr_updated <- min_abs_corr - jk_equiv_corr_updated <- 0.8 - jk_equiv_loglik_updated <- 1 - func_simplex_updated <- "LD_z2z" + jk_equiv_corr_updated <- jk_equiv_corr + jk_equiv_loglik_updated <- jk_equiv_loglik + func_simplex_updated <- func_simplex + cos_npc_cutoff_updated <- cos_npc_cutoff + npc_outcome_cutoff_updated <- npc_outcome_cutoff if (is.null(LD)) { # if no LD input, set diagonal matrix to LD @@ -644,6 +675,8 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL, jk_equiv_corr_updated <- 0 jk_equiv_loglik_updated <- 0.1 func_simplex_updated <- "only_z2z" + cos_npc_cutoff_updated <- 0 + npc_outcome_cutoff_updated <- 0 } else { @@ -804,9 +837,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL, Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- NULL M_updated <- M min_abs_corr_updated <- min_abs_corr - jk_equiv_corr_updated <- 0.8 - jk_equiv_loglik_updated <- 1 - func_simplex_updated <- "LD_z2z" + jk_equiv_corr_updated <- jk_equiv_corr + jk_equiv_loglik_updated <- jk_equiv_loglik + func_simplex_updated <- func_simplex + cos_npc_cutoff_updated = cos_npc_cutoff + npc_outcome_cutoff_updated = npc_outcome_cutoff } return(list( @@ -826,7 +861,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL, min_abs_corr = min_abs_corr_updated, jk_equiv_corr = jk_equiv_corr_updated, jk_equiv_loglik = jk_equiv_loglik_updated, - func_simplex = func_simplex_updated + func_simplex = func_simplex_updated, + cos_npc_cutoff = cos_npc_cutoff_updated, + npc_outcome_cutoff = npc_outcome_cutoff_updated )) } diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index 882fc7a..ff9f312 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -269,7 +269,7 @@ check_null_post <- function(cb_obj, ld_feature <- sqrt(abs(ld_jk)) # - calculate delta delta <- boost_KL_delta( - z = z, ld_feature = ld_feature, adj_dep = adj_dep, + z = z, ld_feature = ld_feature, adj_dep = adj_dep, func_simplex = func_simplex, lambda = lambda ) scaling_factor <- if (!is.null(N)) (N - 1) else 1 @@ -575,7 +575,6 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) { } avWeight <- coloc_out$avWeight - cs_change <- coloc_out$cs_change check_null_max <- sapply(cb_obj$cb_model, function(cb) cb$check_null_max) outcome_names <- data_info$outcome_info$outcome_names n_cos <- length(avWeight) @@ -603,3 +602,99 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) { return(list(normalization_evidence = normalization_evidence, npc = npc)) } + + +#' Function to get the evidence of trait-specific ucos +#' @keywords cb_post_inference +#' @noRd +#' @importFrom stats var +#' @importFrom utils tail +get_ucos_evidence <- function(cb_obj, ucoloc_info) { + + get_ucos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL, + XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) { + if (!is.null(X)) { + cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1) + yty <- var(Y) + } else if (!is.null(XtY)) { + scaling_factor <- if (!is.null(N)) (N - 1) else 1 + beta_scaling <- if (!is.null(N)) 1 else 100 + cs_beta <- cs_beta / beta_scaling + yty <- YtY / scaling_factor + xtx <- XtX + if (length(miss_idx) != 0) { + xty <- XtY[-miss_idx] / scaling_factor + cs_beta <- cs_beta[-miss_idx] + } else { + xty <- XtY / scaling_factor + } + if (length(xtx) == 1){ + cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep + } else { + cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep + } + } + delta <- yty - cos_profile + if (delta <= 0) { + warning(paste( + "Warning message: potential sumstat & LD mismatch may happens for outcome", outcome_idx, + ". Using logLR = uCoS(profile) - max(profile). Please check our website https://statfungen.github.io/colocboost/articles/." + )) + } + cos_profile + } + + get_outcome_profile_change <- function(outcome_idx, ucos, cb_obj) { + extract_last <- function(lst) { + tail(lst, n = 1) + } + cb_data <- cb_obj$cb_data + cs_beta <- rep(0, cb_obj$cb_model_para$P) + cs_beta[ucos] <- cb_obj$cb_model[[outcome_idx]]$beta[ucos] + X_dict <- cb_data$dict[outcome_idx] + cos_profile <- get_ucos_profile(cs_beta, outcome_idx, + X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[outcome_idx]]$Y, + XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY, + YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N, + miss_idx = cb_data$data[[outcome_idx]]$variable_miss, + adj_dep = cb_data$data[[outcome_idx]]$dependency + ) + max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each) + ifelse(max_profile < cos_profile, 0, max_profile - cos_profile) + } + + # - Calculate best configuration likelihood explained by minimal configuration + get_normalization_evidence <- function(profile_change, null_max, outcomes, outcome_names) { + # Define the exponential likelihood ratio normalization (ELRN) + logLR_normalization <- function(ratio) { + 1 - exp(-2 * ratio) + } + + ratio <- profile_change / null_max + prob <- logLR_normalization(ratio) + df <- data.frame(outcome = outcome_names, outcomes_index = outcomes, relative_logLR = ratio, npc_outcome = prob) + return(df) + } + + check_null_max_ucos <- sapply(cb_obj$cb_model, function(cb) cb$check_null_max_ucos) + n_ucos <- length(ucoloc_info$ucos) + normalization_evidence <- list() + for (i in 1:n_ucos) { + outcome_idx <- ucoloc_info$outcome[[i]] + outcome_name <- ucoloc_info$outcome_name[[i]] + # most likely cos + ucos <- ucoloc_info$ucos[[i]] + profile_change_outcome <- get_outcome_profile_change(outcome_idx, ucos, cb_obj) + normalization_evidence[[i]] <- get_normalization_evidence( + profile_change = profile_change_outcome, + null_max = check_null_max_ucos[outcome_idx], + outcome_idx, outcome_name + ) + } + normalization_evidence <- do.call(rbind, normalization_evidence) + rownames(normalization_evidence) <- names(ucoloc_info$ucos) + return(normalization_evidence) +} + + + diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 87617e8..6bbab6f 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -134,7 +134,7 @@ get_colocboost_summary <- function(cb_output, #' \item{cos_details}{A object with all information for colocalization results.} #' \item{data_info}{A object with detailed information from input data} #' \item{model_info}{A object with detailed information for colocboost model} -#' \item{ucos_from_cos}{A object with information for trait-specific effects if exists after removing weaker signals.} +#' \item{ucos_details}{A object with information for trait-specific effects if exists after removing weaker signals.} #' #' @examples #' # colocboost example @@ -176,7 +176,7 @@ get_robust_colocalization <- function(cb_output, } if (is.null(cb_output$cos_details)) { - warning("No colocalization results in this region!") + message("No colocalization results in this region!") return(cb_output) } @@ -289,7 +289,12 @@ get_robust_colocalization <- function(cb_output, } } } - names(coloc_outcome) <- names(coloc_outcome_index) <- names(cos_min_npc_outcome) <- names(cos_npc) <- colocset_names + names(coloc_outcome) <- names(coloc_outcome_index) <- names(cos_min_npc_outcome) <- + names(cos_npc) <- names(cos_details$cos_weights) <- colocset_names + cos_details$cos_purity <- lapply(cos_details$cos_purity, function(pp){ + colnames(pp) <- rownames(pp) <- colocset_names + pp + }) cos_details$cos_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) cos_details$cos_min_npc_outcome <- cos_min_npc_outcome cos_details$cos_npc <- cos_npc @@ -355,11 +360,6 @@ get_robust_colocalization <- function(cb_output, cb_output <- remove_cos(cb_output, remove_idx = remove) cos_details <- cb_output$cos_details - # remove CoS does not pass cos_npc_cutoff - remove <- which(cos_details$cos_npc < cos_npc_cutoff) - cb_output <- remove_cos(cb_output, remove_idx = remove) - cos_details <- cb_output$cos_details - # remove CoS only with one trait n_outcome <- sapply(cos_details$cos_outcomes$outcome_index, length) single <- which(n_outcome == 1) @@ -369,27 +369,59 @@ get_robust_colocalization <- function(cb_output, cb_output <- c(cb_output, list(vcp = NULL, cos_details = NULL)) } else if (length(single) != 0 & length(single) != length(n_outcome)) { # - partial remaining the single outcome + ucos_outcomes_npc <- data.frame( + outcome = unlist(cos_details$cos_outcomes$outcome_name[single]), + outcomes_index = unlist(cos_details$cos_outcomes$outcome_index[single]), + relative_logLR = sapply(single, function(ss){ + cos_details$cos_outcomes_npc[[ss]]$relative_logLR[1] + }), + npc_outcome = sapply(single, function(ss){ + cos_details$cos_outcomes_npc[[ss]]$npc_outcome[1] + }) + ) + rownames(ucos_outcomes_npc) <- names(cos_details$cos$cos_index[single]) + ww <- cos_details$cos_weights[single] + names(ww) <- names(cos_details$cos$cos_index[single]) ucos_from_cos <- list( "ucos" = list( "ucos_index" = cos_details$cos$cos_index[single], "ucos_variables" = cos_details$cos$cos_variables[single] ), "ucos_outcomes" = list( - "outcome_idx" = cos_details$cos_outcomes$outcome_index[single], + "outcome_index" = cos_details$cos_outcomes$outcome_index[single], "outcome_name" = cos_details$cos_outcomes$outcome_name[single] ), - "ucos_weight" = cos_details$cos_weights[single], + "ucos_weight" = ww, + "ucos_top_variables" = cos_details$cos_top_variables[single, , drop = FALSE], "ucos_purity" = list( "min_abs_cor" = as.matrix(cos_details$cos_purity$min_abs_cor)[single, single, drop = FALSE], "median_abs_cor" = as.matrix(cos_details$cos_purity$median_abs_cor)[single, single, drop = FALSE], "max_abs_cor" = as.matrix(cos_details$cos_purity$max_abs_cor)[single, single, drop = FALSE] ), - "ucos_top_variables" = cos_details$cos_top_variables[single, , drop = FALSE] + "cos_ucos_purity" = list( + "min_abs_cor" = as.matrix(cos_details$cos_purity$min_abs_cor)[-single, single, drop = FALSE], + "median_abs_cor" = as.matrix(cos_details$cos_purity$median_abs_cor)[-single, single, drop = FALSE], + "max_abs_cor" = as.matrix(cos_details$cos_purity$max_abs_cor)[-single, single, drop = FALSE] + ), + "ucos_outcomes_npc" = ucos_outcomes_npc ) - cb_output$ucos_from_cos <- ucos_from_cos cb_output <- remove_cos(cb_output, remove_idx = single) + + # merge ucos_from_cos to ucos_details if appliable + message("There are ", length(single), " uCoS generated after filtering the robust colocalization.") + if (!("ucos_details" %in% names(cb_output))) { + cb_output$ucos_details <- ucos_from_cos + } else { + cb_output$ucos_details <- merge_ucos_details(cb_output$ucos_details, ucos_from_cos) + } } + + # remove CoS does not pass cos_npc_cutoff + remove <- which(cb_output$cos_details$cos_npc < cos_npc_cutoff) + cb_output <- remove_cos(cb_output, remove_idx = remove) + cos_details <- cb_output$cos_details + # - refine and output class(cb_output) <- "colocboost" cb_output$cos_summary <- get_cos_summary(cb_output) @@ -397,6 +429,183 @@ get_robust_colocalization <- function(cb_output, return(cb_output) } + +#' @rdname get_robust_ucos +#' +#' @title Recalibrate and summarize robust uncolocalized events. +#' +#' @description `get_robust_ucos` get the uncolocalized events by discarding the weaker signals if "ucos_details" with "output_level = 2" exist. +#' +#' @param cb_output Output object from `colocboost` analysis +#' @param npc_outcome_cutoff Minimum threshold of normalized probability of colocalized traits in each CoS. +#' @param pvalue_cutoff Maximum threshold of marginal p-values of colocalized variants on colocalized traits in each CoS. +#' +#' @return A \code{"colocboost"} object with some or all of the following elements: +#' +#' \item{cos_summary}{A summary table for colocalization events.} +#' \item{vcp}{The variable colocalized probability for each variable.} +#' \item{cos_details}{A object with all information for colocalization results.} +#' \item{data_info}{A object with detailed information from input data} +#' \item{model_info}{A object with detailed information for colocboost model} +#' \item{ucos_details}{A object with information for trait-specific effects if exists after removing weaker signals.} +#' +#' @examples +#' # colocboost example +#' set.seed(1) +#' N <- 1000 +#' P <- 100 +#' # Generate X with LD structure +#' sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +#' X <- MASS::mvrnorm(N, rep(0, P), sigma) +#' colnames(X) <- paste0("SNP", 1:P) +#' L <- 3 +#' true_beta <- matrix(0, P, L) +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 +#' Y <- matrix(0, N, L) +#' for (l in 1:L) { +#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) +#' } +#' res <- colocboost(X = X, Y = Y, output_level = 2) +#' res$ucos_details$ucos$ucos_index +#' filter_res <- get_robust_ucos(res, npc_outcome_cutoff = 0.2, pvalue_cutoff = 1e-6) +#' filter_res$ucos_details$ucos$ucos_index +#' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +#' +#' @family colocboost_inference +#' @export +get_robust_ucos <- function(cb_output, + npc_outcome_cutoff = 0.2, + pvalue_cutoff = NULL) { + if (!inherits(cb_output, "colocboost")) { + stop("Input must from colocboost object!") + } + + if (!("ucos_details" %in% names(cb_output))) { + warning( + "Since you want to extract robust outcome-specific (uncolocalized) events from trait-specific (uncolocalized) sets,", + " but there is no output of ucos_details from colocboost.\n", + " Please run colocboost model with output_level=2!" + ) + return(cb_output) + } + + if (is.null(cb_output$ucos_details)) { + message("No uncolocalized results in this region!") + return(cb_output) + } + + if (npc_outcome_cutoff == 0 && is.null(pvalue_cutoff)) { + message("All possible uncolocalized events are reported regardless of their relative evidence (npc_outcome_cutoff = 0).") + return(cb_output) + } else { + if (is.null(pvalue_cutoff)) { + message(paste0( + "Extracting outcome-specific (uncolocalized) results with npc_outcome_cutoff = ", npc_outcome_cutoff, ".\n", + "Keep only uCoS with npc_outcome_cutoff >= ", npc_outcome_cutoff, ". " + )) + } else { + if (pvalue_cutoff > 1 | pvalue_cutoff < 0) { + warning("Please check the pvalue cutoff in [0,1].") + return(cb_output) + } + if (npc_outcome_cutoff == 0) { + message(paste0( + "Extracting outcome-specific (uncolocalized) results with pvalue_cutoff = ", pvalue_cutoff, ".\n", + "Keep only uCoS with pvalue of variants for the outcome < ", pvalue_cutoff, "." + )) + } else { + message(paste0( + "Extracting outcome-specific (uncolocalized) results with pvalue_cutoff = ", pvalue_cutoff, ", and npc_outcome_cutoff = ", npc_outcome_cutoff, ".\n", + "For each uCoS, keep the outcome-specific (uncolocalized) events that pvalue of variants for the outcome < ", + pvalue_cutoff, " and npc_outcome >", npc_outcome_cutoff, "." + )) + } + } + } + + remove_ucos <- function(cb_output, remove_idx = NULL) { + if (length(remove_idx) == 0) { + return(cb_output) + } + ncos <- length(cb_output$ucos_details$ucos$ucos_index) + if (ncos == 0 | length(remove_idx) == ncos) { + cb_output$ucos_details <- NULL + cb_output <- c(cb_output, list(ucos_details = NULL)) + return(cb_output) + } + ucos_details <- cb_output$ucos_details + ucos_details$ucos_top_variables <- ucos_details$ucos_top_variables[-remove_idx, , drop = FALSE] + ucos_details$ucos$ucos_index <- ucos_details$ucos$ucos_index[-remove_idx] + ucos_details$ucos$ucos_variables <- ucos_details$ucos$ucos_variables[-remove_idx] + ucos_details$ucos_outcomes$outcome_index <- ucos_details$ucos_outcomes$outcome_index[-remove_idx] + ucos_details$ucos_outcomes$outcome_name <- ucos_details$ucos_outcomes$outcome_name[-remove_idx] + ucos_details$ucos_weight <- ucos_details$ucos_weight[-remove_idx] + ucos_details$ucos_outcomes_npc <- ucos_details$ucos_outcomes_npc[-remove_idx, , drop = FALSE] + ucos_details$ucos_purity$min_abs_cor <- as.matrix(ucos_details$ucos_purity$min_abs_cor)[-remove_idx, -remove_idx, drop = FALSE] + ucos_details$ucos_purity$median_abs_cor <- as.matrix(ucos_details$ucos_purity$median_abs_cor)[-remove_idx, -remove_idx, drop = FALSE] + ucos_details$ucos_purity$max_abs_cor <- as.matrix(ucos_details$ucos_purity$max_abs_cor)[-remove_idx, -remove_idx, drop = FALSE] + if (!is.null(ucos_details$cos_ucos_purity)){ + ucos_details$cos_ucos_purity$min_abs_cor <- as.matrix(ucos_details$cos_ucos_purity$min_abs_cor)[, -remove_idx, drop = FALSE] + ucos_details$cos_ucos_purity$median_abs_cor <- as.matrix(ucos_details$cos_ucos_purity$median_abs_cor)[, -remove_idx, drop = FALSE] + ucos_details$cos_ucos_purity$max_abs_cor <- as.matrix(ucos_details$cos_ucos_purity$max_abs_cor)[, -remove_idx, drop = FALSE] + } + cb_output$ucos_details <- ucos_details + return(cb_output) + } + + ucos_details <- cb_output$ucos_details + ucoloc_outcome_index <- ucoloc_outcome <- list() + ucolocset_names <- ucos_min_npc_outcome <- c() + for (i in 1:length(ucos_details$ucos$ucos_index)) { + npc_outcome <- ucos_details$ucos_outcomes_npc$npc_outcome[i] + pos_pass <- which(npc_outcome >= npc_outcome_cutoff) + if (!is.null(pvalue_cutoff)) { + ucos_tmp <- ucos_details$ucos$ucos_index[[i]] + ucos_trait <- ucos_details$ucos_outcomes$outcome_index[[i]] + minPV <- sapply(ucos_trait, function(tmp) { + z <- cb_output$data_info$z[[tmp]][ucos_tmp] + pv <- pchisq(z^2, 1, lower.tail = FALSE) + min(pv) + }) + pos_pass_pvalue <- which(minPV <= pvalue_cutoff) + if (length(pos_pass_pvalue) == 0) { + pos_pass <- NULL + } else { + pos_pass <- 1 + } + } + if (length(pos_pass) == 0) { + ucoloc_outcome_index[[i]] <- 0 + ucoloc_outcome[[i]] <- 0 + ucos_min_npc_outcome[i] <- 0 + ucolocset_names[i] <- paste0("remove", i) + } else { + ucos_min_npc_outcome[i] <- npc_outcome + ucoloc_outcome_index[[i]] <- ucos_details$ucos_outcomes$outcome_index[[i]] + ucoloc_outcome[[i]] <- ucos_details$ucos_outcomes_npc$outcome[i] + ucolocset_names[i] <- paste0("ucos", i, ":", paste0("y", ucoloc_outcome_index[[i]])) + } + } + names(ucoloc_outcome) <- names(ucoloc_outcome_index) <- names(ucos_min_npc_outcome) <- ucolocset_names + ucos_details$ucos_outcomes <- list("outcome_index" = ucoloc_outcome_index, "outcome_name" = ucoloc_outcome) + ucos_details$ucos_outcomes_npc$npc_outcome <- ucos_min_npc_outcome + + # remove CoS does not include outcomes pass npc_outcome_cutoff + remove <- grep("remove", ucolocset_names) + cb_output <- remove_ucos(cb_output, remove_idx = remove) + # - refine and output + class(cb_output) <- "colocboost" + + return(cb_output) +} + + + #' @rdname get_ambiguous_colocalization #' #' @title Get ambiguous colocalization events from trait-specific (uncolocalized) effects. @@ -1063,3 +1272,71 @@ get_cos_purity <- function(cos, X = NULL, Xcorr = NULL, n_purity = 100) { return(cos_purity) } + +#' Function to organize and merge ucos +#' @keywords cb_get_functions +#' @noRd +merge_ucos_details <- function(ucos_details, ucos_from_cos) { + + get_cos_ucos_purity <- function(from_ucos, from_cos){ + cos <- intersect(rownames(from_ucos), rownames(from_ucos)) + tmp_from_ucos <- from_ucos[match(cos, rownames(from_ucos)), , drop = FALSE] + tmp_from_cos <- from_cos[match(cos, rownames(from_cos)), , drop = FALSE] + cbind(tmp_from_ucos, tmp_from_cos) + } + + get_ucos_purity <- function(from_ucos, from_cos, cross_from_ucos, cross_from_cos) { + for (id in unique(sub(":.*", "", rownames(from_cos)))) { + old <- grep(paste0("^", id, ":"), rownames(cross_from_ucos), value = TRUE)[1] + new <- grep(paste0("^", id, ":"), rownames(from_cos), value = TRUE)[1] + if (!is.na(old) && !is.na(new)) { + rownames(cross_from_ucos) <- sub(old, new, rownames(cross_from_ucos), fixed = TRUE) + colnames(cross_from_ucos) <- sub(old, new, colnames(cross_from_ucos), fixed = TRUE) + rownames(cross_from_cos) <- sub(old, new, rownames(cross_from_cos), fixed = TRUE) + colnames(cross_from_cos) <- sub(old, new, colnames(cross_from_cos), fixed = TRUE) + } + } + all_ucos <- c(rownames(from_ucos), rownames(from_cos)) + n <- length(all_ucos) + result <- matrix(NA_real_, n, n, dimnames = list(all_ucos, all_ucos)) + mats <- list(from_ucos, from_cos, cross_from_ucos, cross_from_cos) + for (i in 1:n) { + for (j in 1:n) { + for (m in mats) { + if (all_ucos[i] %in% rownames(m) && all_ucos[j] %in% colnames(m)) { + result[i, j] <- result[j, i] <- m[all_ucos[i], all_ucos[j]] + break + } + } + } + } + result + } + + list( + "ucos" = list( + "ucos_index" = c(ucos_details$ucos$ucos_index, ucos_from_cos$ucos$ucos_index), + "ucos_variables" = c(ucos_details$ucos$ucos_variables, ucos_from_cos$ucos$ucos_variables) + ), + "ucos_outcomes" = list( + "outcome_index" = c(ucos_details$ucos_outcomes$outcome_index, ucos_from_cos$ucos_outcomes$outcome_index), + "outcome_name" = c(ucos_details$ucos_outcomes$outcome_name, ucos_from_cos$ucos_outcomes$outcome_name) + ), + "ucos_weight" = c(ucos_details$ucos_weight, ucos_from_cos$ucos_weight), + "ucos_top_variables" = rbind(ucos_details$ucos_top_variables, ucos_from_cos$ucos_top_variables), + "ucos_purity" = list( + "min_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$min_abs_cor, ucos_from_cos$ucos_purity$min_abs_cor, + ucos_details$cos_ucos_purity$min_abs_cor, ucos_from_cos$cos_ucos_purity$min_abs_cor), + "median_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$median_abs_cor, ucos_from_cos$ucos_purity$median_abs_cor, + ucos_details$cos_ucos_purity$median_abs_cor, ucos_from_cos$cos_ucos_purity$median_abs_cor), + "max_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$max_abs_cor, ucos_from_cos$ucos_purity$max_abs_cor, + ucos_details$cos_ucos_purity$max_abs_cor, ucos_from_cos$cos_ucos_purity$max_abs_cor) + ), + "cos_ucos_purity" = list( + "min_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$min_abs_cor, ucos_details$cos_ucos_purity$min_abs_cor), + "median_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$median_abs_cor, ucos_details$cos_ucos_purity$median_abs_cor), + "max_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$max_abs_cor, ucos_details$cos_ucos_purity$max_abs_cor) + ), + "ucos_outcomes_npc" = rbind(ucos_details$ucos_outcomes_npc, ucos_from_cos$ucos_outcomes_npc) + ) +} diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 9bfa3fd..d03126e 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -949,7 +949,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output names(specific_cs_variableidx) <- names(specific_cs_variablenames) <- specific_cs_names specific_css <- list("ucos_index" = specific_cs_variableidx, "ucos_variables" = specific_cs_variablenames) - # - specific set cs_change + # - specific set cs_change cs_change <- out_ucos$change_obj_each rownames(cs_change) <- specific_cs_names colnames(cs_change) <- analysis_outcome @@ -993,6 +993,14 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save rownames(specific_cs_hits) <- specific_cs_names + # - add npc_outcome for ucos (Dec 08, 2025) + ucoloc_info <- list( + ucos = specific_css$ucos_index, + outcome = specific_outcomes$outcome_index, + outcome_name = specific_outcomes$outcome_name + ) + npc_outcome <- get_ucos_evidence(cb_obj, ucoloc_info) + # - purity nucos <- length(specific_css$ucos_index) if (nucos >= 2) { @@ -1082,7 +1090,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output "ucos_top_variables" = specific_cs_hits, "ucos_purity" = specific_cs_purity, "cos_ucos_purity" = cos_ucos_purity, - "ucos_outcomes_delta" = cs_change + "ucos_outcomes_npc" = npc_outcome ) } } else { diff --git a/man/colocboost_validate_input_data.Rd b/man/colocboost_validate_input_data.Rd index b8616e9..d9c45f4 100644 --- a/man/colocboost_validate_input_data.Rd +++ b/man/colocboost_validate_input_data.Rd @@ -16,7 +16,12 @@ colocboost_validate_input_data( effect_n = NULL, overlap_variables = FALSE, M = 500, - min_abs_corr = 0.5 + min_abs_corr = 0.5, + jk_equiv_corr = 0.8, + jk_equiv_loglik = 1, + func_simplex = "LD_z2z", + cos_npc_cutoff = 0.2, + npc_outcome_cutoff = 0.2 ) } \arguments{ diff --git a/man/get_ambiguous_colocalization.Rd b/man/get_ambiguous_colocalization.Rd index 1aeb315..4cff397 100644 --- a/man/get_ambiguous_colocalization.Rd +++ b/man/get_ambiguous_colocalization.Rd @@ -41,6 +41,7 @@ names(res$ambiguous_cos) \seealso{ Other colocboost_inference: \code{\link{get_colocboost_summary}()}, -\code{\link{get_robust_colocalization}()} +\code{\link{get_robust_colocalization}()}, +\code{\link{get_robust_ucos}()} } \concept{colocboost_inference} diff --git a/man/get_colocboost_summary.Rd b/man/get_colocboost_summary.Rd index 2f7c9da..f846081 100644 --- a/man/get_colocboost_summary.Rd +++ b/man/get_colocboost_summary.Rd @@ -102,6 +102,7 @@ get_colocboost_summary(res) \seealso{ Other colocboost_inference: \code{\link{get_ambiguous_colocalization}()}, -\code{\link{get_robust_colocalization}()} +\code{\link{get_robust_colocalization}()}, +\code{\link{get_robust_ucos}()} } \concept{colocboost_inference} diff --git a/man/get_robust_colocalization.Rd b/man/get_robust_colocalization.Rd index d4acbce..4128c64 100644 --- a/man/get_robust_colocalization.Rd +++ b/man/get_robust_colocalization.Rd @@ -38,7 +38,7 @@ A \code{"colocboost"} object with some or all of the following elements: \item{cos_details}{A object with all information for colocalization results.} \item{data_info}{A object with detailed information from input data} \item{model_info}{A object with detailed information for colocboost model} -\item{ucos_from_cos}{A object with information for trait-specific effects if exists after removing weaker signals.} +\item{ucos_details}{A object with information for trait-specific effects if exists after removing weaker signals.} } \description{ \code{get_robust_colocalization} get the colocalization by discarding the weaker colocalization events or colocalized outcomes @@ -71,6 +71,7 @@ filter_res$cos_details$cos$cos_index \seealso{ Other colocboost_inference: \code{\link{get_ambiguous_colocalization}()}, -\code{\link{get_colocboost_summary}()} +\code{\link{get_colocboost_summary}()}, +\code{\link{get_robust_ucos}()} } \concept{colocboost_inference} diff --git a/man/get_robust_ucos.Rd b/man/get_robust_ucos.Rd new file mode 100644 index 0000000..76bdce8 --- /dev/null +++ b/man/get_robust_ucos.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_output.R +\name{get_robust_ucos} +\alias{get_robust_ucos} +\title{Recalibrate and summarize robust uncolocalized events.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} +\usage{ +get_robust_ucos(cb_output, npc_outcome_cutoff = 0.2, pvalue_cutoff = NULL) +} +\arguments{ +\item{cb_output}{Output object from \code{colocboost} analysis} + +\item{npc_outcome_cutoff}{Minimum threshold of normalized probability of colocalized traits in each CoS.} + +\item{pvalue_cutoff}{Maximum threshold of marginal p-values of colocalized variants on colocalized traits in each CoS.} +} +\value{ +A \code{"colocboost"} object with some or all of the following elements: + +\item{cos_summary}{A summary table for colocalization events.} +\item{vcp}{The variable colocalized probability for each variable.} +\item{cos_details}{A object with all information for colocalization results.} +\item{data_info}{A object with detailed information from input data} +\item{model_info}{A object with detailed information for colocboost model} +\item{ucos_details}{A object with information for trait-specific effects if exists after removing weaker signals.} +} +\description{ +\code{get_robust_ucos} get the uncolocalized events by discarding the weaker signals if "ucos_details" with "output_level = 2" exist. +} +\examples{ +# colocboost example +set.seed(1) +N <- 1000 +P <- 100 +# Generate X with LD structure +sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +X <- MASS::mvrnorm(N, rep(0, P), sigma) +colnames(X) <- paste0("SNP", 1:P) +L <- 3 +true_beta <- matrix(0, P, L) +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 +Y <- matrix(0, N, L) +for (l in 1:L) { + Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) +} +res <- colocboost(X = X, Y = Y, output_level = 2) +res$ucos_details$ucos$ucos_index +filter_res <- get_robust_ucos(res, npc_outcome_cutoff = 0.2, pvalue_cutoff = 1e-6) +filter_res$ucos_details$ucos$ucos_index + +} +\seealso{ +Other colocboost_inference: +\code{\link{get_ambiguous_colocalization}()}, +\code{\link{get_colocboost_summary}()}, +\code{\link{get_robust_colocalization}()} +} +\concept{colocboost_inference} diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R index 6123ad3..9c2c9d0 100644 --- a/tests/testthat/test_inference.R +++ b/tests/testthat/test_inference.R @@ -58,6 +58,57 @@ generate_test_result <- function(n = 100, p = 50, L = 2, seed = 42) { return(result) } + +# Utility function to generate test data with uncolocalized effects +generate_ucos_test_data <- function(n = 500, p = 60, L = 3, seed = 42, output_level = 3) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects with both colocalized and trait-specific effects + true_beta <- matrix(0, p, L) + + # Colocalized effect: SNP10 affects traits 1 and 2 + true_beta[10, 1] <- 0.7 + true_beta[10, 2] <- 0.6 + + # Trait-specific (uncolocalized) effects + true_beta[30, 1] <- 0.5 # SNP30 only affects trait 1 + true_beta[45, 2] <- 0.6 # SNP45 only affects trait 2 + true_beta[50, 3] <- 0.7 # SNP50 only affects trait 3 + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Prepare input for colocboost + Y_input <- lapply(1:L, function(l) Y[,l]) + X_input <- replicate(L, X, simplify = FALSE) + + # Run colocboost with output_level to get ucos_details + suppressWarnings({ + result <- colocboost( + X = X_input, + Y = Y_input, + output_level = output_level + ) + }) + + return(result) +} + + + # Test for get_strong_colocalization test_that("get_robust_colocalization filters results correctly", { @@ -371,4 +422,752 @@ test_that("get_colocboost_summary works correctly", { # Test error handling expect_error(get_colocboost_summary("not_a_colocboost_object"), "Input must from colocboost output!") +}) + + + +# ============================================================================ +# Tests for get_robust_ucos +# ============================================================================ + +test_that("get_robust_ucos basic functionality works", { + + # Generate test data with ucos + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Basic call with default parameters + result <- get_robust_ucos(cb_res) + + # Should return a colocboost object + expect_s3_class(result, "colocboost") + + # Should have ucos_details (unless all were filtered out) + expect_true("ucos_details" %in% names(result)) +}) + + +test_that("get_robust_ucos filters by npc_outcome_cutoff", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Get original number of ucos + n_ucos_original <- length(cb_res$ucos_details$ucos$ucos_index) + + # Apply lenient filtering (should keep most/all) + result_lenient <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.1) + + # Apply strict filtering (should remove some/all) + result_strict <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.8) + + # Check that strict filtering removes at least as many as lenient + n_ucos_lenient <- if(is.null(result_lenient$ucos_details)) 0 else length(result_lenient$ucos_details$ucos$ucos_index) + n_ucos_strict <- if(is.null(result_strict$ucos_details)) 0 else length(result_strict$ucos_details$ucos$ucos_index) + + expect_true(n_ucos_strict <= n_ucos_lenient) + expect_true(n_ucos_lenient <= n_ucos_original) +}) + +test_that("get_robust_ucos filters by pvalue_cutoff", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply lenient p-value filtering + expect_message( + result_lenient <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0, pvalue_cutoff = 0.1), + "Extracting outcome-specific.*pvalue_cutoff = 0.1" + ) + + # Apply strict p-value filtering + expect_message( + result_strict <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0, pvalue_cutoff = 1e-6), + "Extracting outcome-specific.*pvalue_cutoff = 1e-06" + ) + + # Both should return colocboost objects + expect_s3_class(result_lenient, "colocboost") + expect_s3_class(result_strict, "colocboost") +}) + +test_that("get_robust_ucos filters by both npc_outcome_cutoff and pvalue_cutoff", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply both filters + expect_message( + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.3, pvalue_cutoff = 0.01), + "Extracting outcome-specific.*pvalue_cutoff = 0.01.*npc_outcome_cutoff = 0.3" + ) + + # Should return a colocboost object + expect_s3_class(result, "colocboost") +}) + +test_that("get_robust_ucos handles npc_outcome_cutoff = 0 correctly", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # With npc_outcome_cutoff = 0 and no pvalue_cutoff, should return unchanged + expect_message( + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0), + "All possible uncolocalized events are reported" + ) + + # Should be essentially unchanged + expect_equal( + length(result$ucos_details$ucos$ucos_index), + length(cb_res$ucos_details$ucos$ucos_index) + ) +}) + +test_that("get_robust_ucos handles missing ucos_details", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 1) # No ucos_details + + # Should warn and return unchanged + expect_warning( + result <- get_robust_ucos(cb_res), + "Please run colocboost model with output_level=2" + ) + + # Should return original object + expect_equal(result, cb_res) +}) + + +test_that("get_robust_ucos validates input object type", { + + # Should error with non-colocboost object + expect_error( + get_robust_ucos("not_a_colocboost_object"), + "Input must from colocboost object!" + ) + + expect_error( + get_robust_ucos(list(some = "data")), + "Input must from colocboost object!" + ) +}) + +test_that("get_robust_ucos validates pvalue_cutoff range", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # pvalue_cutoff > 1 should warn + expect_warning( + result <- get_robust_ucos(cb_res, pvalue_cutoff = 1.5), + "Please check the pvalue cutoff in \\[0,1\\]" + ) + + # pvalue_cutoff < 0 should warn + expect_warning( + result <- get_robust_ucos(cb_res, pvalue_cutoff = -0.1), + "Please check the pvalue cutoff in \\[0,1\\]" + ) +}) + + +test_that("get_robust_ucos correctly removes all ucos when all fail cutoff", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply impossible cutoff + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = 1.0) + + # Should return colocboost object + expect_s3_class(result, "colocboost") + + # ucos_details should be NULL + expect_null(result$ucos_details) +}) + +test_that("get_robust_ucos maintains data structure integrity", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply moderate filtering + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.2) + + # Should maintain colocboost structure + expect_s3_class(result, "colocboost") + + # Should have expected fields + expect_true("data_info" %in% names(result)) + expect_true("model_info" %in% names(result)) + + # If ucos_details exists, check its structure + if (!is.null(result$ucos_details)) { + expect_true("ucos" %in% names(result$ucos_details)) + expect_true("ucos_outcomes" %in% names(result$ucos_details)) + expect_true("ucos_outcomes_npc" %in% names(result$ucos_details)) + expect_true("ucos_weight" %in% names(result$ucos_details)) + expect_true("ucos_purity" %in% names(result$ucos_details)) + + # Check that all list elements have consistent lengths + n_ucos <- length(result$ucos_details$ucos$ucos_index) + expect_equal(length(result$ucos_details$ucos$ucos_variables), n_ucos) + expect_equal(length(result$ucos_details$ucos_outcomes$outcome_index), n_ucos) + expect_equal(length(result$ucos_details$ucos_outcomes$outcome_name), n_ucos) + expect_equal(length(result$ucos_details$ucos_weight), n_ucos) + expect_equal(nrow(result$ucos_details$ucos_outcomes_npc), n_ucos) + } +}) + + +test_that("get_robust_ucos preserves names after filtering", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply filtering + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.2) + + # If ucos remain, check that names don't contain "remove" + if (!is.null(result$ucos_details)) { + ucos_names <- names(result$ucos_details$ucos_outcomes$outcome_index) + + # Should not have "remove" in names + expect_false(any(grepl("^remove", ucos_names))) + + # Should have proper "ucos" naming pattern + expect_true(all(grepl("^ucos", ucos_names))) + } +}) + + +# Utility function to generate test data with exactly ONE uncolocalized effect +generate_single_ucos_test_data <- function(n = 200, p = 40, seed = 123) { + set.seed(seed) + + # Only 2 traits for simplicity + L <- 2 + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects: + # - Colocalized effect at SNP10 (affects both traits) + # - ONE trait-specific effect at SNP25 (affects only trait 2) + true_beta <- matrix(0, p, L) + true_beta[10, 1] <- 0.8 # SNP10 affects trait 1 + true_beta[10, 2] <- 0.7 # SNP10 also affects trait 2 (colocalized) + true_beta[25, 2] <- 0.6 # SNP25 ONLY affects trait 2 (this is the single ucos) + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 0.8) # Less noise for clearer signals + } + + # Prepare input for colocboost + Y_input <- lapply(1:L, function(l) Y[,l]) + X_input <- replicate(L, X, simplify = FALSE) + + # Run colocboost with output_level = 2 to get ucos_details + suppressWarnings({ + result <- colocboost( + X = X_input, + Y = Y_input, + M = 10, # More iterations for better detection + output_level = 2 + ) + }) + + return(result) +} + + +test_that("get_robust_ucos handles edge case with single ucos", { + + # Generate test data + cb_res <- generate_single_ucos_test_data() + + # Apply lenient filtering (should keep the single ucos) + result_keep <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.0) + + # Should maintain structure + expect_s3_class(result_keep, "colocboost") + expect_true(!is.null(result_keep$ucos_details)) + expect_equal(length(result_keep$ucos_details$ucos$ucos_index), 1) + + # Apply strict filtering (should remove the single ucos) + result_remove <- get_robust_ucos(cb_res, npc_outcome_cutoff = 1.0) + + # Should have NULL ucos_details + expect_null(result_remove$ucos_details) +}) + + +# Helper to generate proper cb_obj structure for testing get_ucos_evidence +# Following test_model.R generate_test_model pattern +generate_test_cb_obj_with_ucos <- function(n = 100, p = 20, L = 2, seed = 42) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects with trait-specific (ucos) effects + true_beta <- matrix(0, p, L) + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 (trait-specific) + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Convert Y to list + Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) + + # Run colocboost to get diagnostic_details + suppressWarnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + M = 5, + output_level = 3 + )$diagnostic_details + }) + + # Reconstruct cb_data properly following test_model.R pattern + result$cb_model_para$update_y <- c(1:result$cb_model_para$L) + Y_list <- lapply(Y_list, as.matrix) + + result$cb_data <- colocboost_init_data( + X = X_list, + Y = Y_list, + dict_YX = c(1, 2), + Z = NULL, + LD = NULL, + N_sumstat = NULL, + dict_sumstatLD = NULL, + Var_y = NULL, + SeBhat = NULL, + keep_variables = lapply(X_list, colnames) + ) + + class(result) <- "colocboost" + result +} + + +# ============================================================================ +# Tests for get_ucos_evidence (internal function) +# ============================================================================ + +test_that("get_ucos_evidence returns correct structure", { + + # Generate proper cb_obj structure following test_model.R pattern + cb_obj <- generate_test_cb_obj_with_ucos() + + # Also run colocboost to get ucos_details + set.seed(42) + n <- 100 + p <- 20 + L <- 2 + + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + true_beta <- matrix(0, p, L) + true_beta[5, 1] <- 0.5 + true_beta[5, 2] <- 0.4 + true_beta[10, 2] <- 0.3 + + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) + + suppressWarnings({ + cb_res <- colocboost( + X = X_list, + Y = Y_list, + M = 5, + output_level = 2 + ) + }) + + # Skip if no ucos were detected + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Try to access the function from the package namespace + if (exists("get_ucos_evidence", envir = asNamespace("colocboost"), mode = "function")) { + get_ucos_evidence <- get("get_ucos_evidence", envir = asNamespace("colocboost")) + + # Prepare ucoloc_info from the ucos_details + ucoloc_info <- list( + ucos = cb_res$ucos_details$ucos$ucos_index, + outcome = cb_res$ucos_details$ucos_outcomes$outcome_index, + outcome_name = cb_res$ucos_details$ucos_outcomes$outcome_name + ) + + # Call get_ucos_evidence with the proper cb_obj structure + result <- get_ucos_evidence(cb_obj, ucoloc_info) + + # Check structure + expect_true(is.data.frame(result)) + + # Check expected columns + expected_cols <- c("outcome", "outcomes_index", "relative_logLR", "npc_outcome") + expect_true(all(expected_cols %in% colnames(result))) + + # Check that npc_outcome is in [0, 1] + expect_true(all(result$npc_outcome >= 0 & result$npc_outcome <= 1)) + + # Check that relative_logLR is non-negative + expect_true(all(result$relative_logLR >= 0)) + + } else { + skip("get_ucos_evidence not accessible for testing") + } +}) + +test_that("get_ucos_evidence handles individual-level data", { + + # Generate test data + set.seed(123) + n <- 200 + p <- 30 + L <- 2 + + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + true_beta <- matrix(0, p, L) + true_beta[10, 1] <- 0.7 + true_beta[20, 2] <- 0.6 + + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + Y_list <- lapply(1:L, function(l) Y[,l]) + X_list <- replicate(L, X, simplify = FALSE) + + # Run colocboost to get ucos_details + suppressWarnings({ + cb_res <- colocboost( + X = X_list, + Y = Y_list, + M = 5, + output_level = 3 + ) + }) + + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Build proper cb_obj structure following test_model.R + cb_obj <- cb_res$diagnostic_details + cb_obj$cb_model_para$update_y <- c(1:cb_obj$cb_model_para$L) + Y_list <- lapply(Y_list, as.matrix) + + cb_obj$cb_data <- colocboost_init_data( + X = X_list, + Y = Y_list, + dict_YX = c(1, 2), + Z = NULL, + LD = NULL, + N_sumstat = NULL, + dict_sumstatLD = NULL, + Var_y = NULL, + SeBhat = NULL, + keep_variables = lapply(X_list, colnames) + ) + class(cb_obj) <- "colocboost" + + # Try to access the function from the package namespace + if (exists("get_ucos_evidence", envir = asNamespace("colocboost"), mode = "function")) { + get_ucos_evidence <- get("get_ucos_evidence", envir = asNamespace("colocboost")) + + ucoloc_info <- list( + ucos = cb_res$ucos_details$ucos$ucos_index, + outcome = cb_res$ucos_details$ucos_outcomes$outcome_index, + outcome_name = cb_res$ucos_details$ucos_outcomes$outcome_name + ) + + # Should work with individual-level data + expect_error( + result <- get_ucos_evidence(cb_obj, ucoloc_info), + NA + ) + + expect_true(is.data.frame(result)) + + } else { + skip("get_ucos_evidence not accessible for testing") + } +}) + +test_that("get_ucos_evidence handles summary statistics data", { + + # Generate test data + set.seed(456) + n <- 200 + p <- 30 + L <- 2 + + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + true_beta <- matrix(0, p, L) + true_beta[10, 1] <- 0.7 + true_beta[20, 2] <- 0.6 + + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Calculate summary statistics + beta <- matrix(0, p, L) + se <- matrix(0, p, L) + for (i in 1:L) { + for (j in 1:p) { + fit <- summary(lm(Y[,i] ~ X[,j]))$coef + if (nrow(fit) == 2) { + beta[j,i] <- fit[2,1] + se[j,i] <- fit[2,2] + } + } + } + + sumstat_list <- lapply(1:L, function(i) { + data.frame( + beta = beta[,i], + sebeta = se[,i], + n = n, + variant = colnames(X) + ) + }) + + LD_matrix <- cor(X) + + # Run colocboost + suppressWarnings({ + cb_res <- colocboost( + sumstat = sumstat_list, + LD = LD_matrix, + M = 5, + output_level = 3 + ) + }) + + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Build proper cb_obj structure + cb_obj <- cb_res$diagnostic_details + cb_obj$cb_model_para$update_y <- c(1:cb_obj$cb_model_para$L) + + Z_list <- lapply(sumstat_list, function(s) s$beta / s$sebeta) + + cb_obj$cb_data <- colocboost_init_data( + X = NULL, + Y = NULL, + dict_YX = NULL, + Z = Z_list, + LD = list(LD_matrix), + N_sumstat = lapply(sumstat_list, function(s) s$n[1]), + dict_sumstatLD = c(1, 1), + Var_y = NULL, + SeBhat = NULL, + keep_variables = lapply(sumstat_list, function(s) s$variant) + ) + class(cb_obj) <- "colocboost" + + # Try to access the function + if (exists("get_ucos_evidence", envir = asNamespace("colocboost"), mode = "function")) { + get_ucos_evidence <- get("get_ucos_evidence", envir = asNamespace("colocboost")) + + ucoloc_info <- list( + ucos = cb_res$ucos_details$ucos$ucos_index, + outcome = cb_res$ucos_details$ucos_outcomes$outcome_index, + outcome_name = cb_res$ucos_details$ucos_outcomes$outcome_name + ) + + # Should work with summary statistics + expect_error( + result <- get_ucos_evidence(cb_obj, ucoloc_info), + NA + ) + + expect_true(is.data.frame(result)) + + } else { + skip("get_ucos_evidence not accessible for testing") + } +}) + +# ============================================================================ +# Integration tests +# ============================================================================ + +test_that("get_robust_ucos and get_ucos_evidence work together", { + + # Generate proper cb_obj + cb_obj <- generate_test_cb_obj_with_ucos(n = 150, p = 30, L = 2, seed = 555) + + # Also generate cb_res for filtering + set.seed(555) + n <- 150 + p <- 30 + L <- 2 + + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + true_beta <- matrix(0, p, L) + true_beta[8, 1] <- 0.6 + true_beta[18, 2] <- 0.5 + + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + Y_list <- lapply(1:L, function(l) Y[,l]) + X_list <- replicate(L, X, simplify = FALSE) + + suppressWarnings({ + cb_res <- colocboost( + X = X_list, + Y = Y_list, + M = 5, + output_level = 2 + ) + }) + + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + + # Apply filtering + filtered_res <- get_robust_ucos(cb_res, npc_outcome_cutoff = 0.2) + + # Check that the result is valid + expect_s3_class(filtered_res, "colocboost") + + # If ucos remain, check that we can extract evidence + if (!is.null(filtered_res$ucos_details)) { + + if (exists("get_ucos_evidence", envir = asNamespace("colocboost"), mode = "function")) { + get_ucos_evidence <- get("get_ucos_evidence", envir = asNamespace("colocboost")) + + ucoloc_info <- list( + ucos = filtered_res$ucos_details$ucos$ucos_index, + outcome = filtered_res$ucos_details$ucos_outcomes$outcome_index, + outcome_name = filtered_res$ucos_details$ucos_outcomes$outcome_name + ) + + expect_error( + evidence <- get_ucos_evidence(cb_obj, ucoloc_info), + NA + ) + + # All npc values should meet the cutoff (or be 0 if removed) + expect_true(all(evidence$npc_outcome >= 0.2 | evidence$npc_outcome == 0)) + + } else { + skip("get_ucos_evidence not accessible for testing") + } + } +}) + +test_that("get_robust_ucos with different cutoffs produces expected ordering", { + + # Generate test data + cb_res <- generate_ucos_test_data(output_level = 2) + + # Skip if no ucos + skip_if(is.null(cb_res$ucos_details), "No ucos detected in test data") + skip_if(length(cb_res$ucos_details$ucos$ucos_index) < 2, "Need at least 2 ucos for this test") + + # Apply progressively stricter cutoffs + cutoffs <- c(0.1, 0.3, 0.5, 0.7, 0.9) + n_ucos_remaining <- integer(length(cutoffs)) + + for (i in seq_along(cutoffs)) { + result <- get_robust_ucos(cb_res, npc_outcome_cutoff = cutoffs[i]) + n_ucos_remaining[i] <- if(is.null(result$ucos_details)) 0 else length(result$ucos_details$ucos$ucos_index) + } + + # The number of remaining ucos should be monotonically non-increasing + expect_true(all(diff(n_ucos_remaining) <= 0)) }) \ No newline at end of file