Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions R/colocboost_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ qc_regional_data <- function(region_data,
qc_method = c("rss_qc", "dentist", "slalom"),
impute = TRUE,
impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) {
if (!is.null(qc_method)) qc_method <- match.arg(qc_method)
#### related internal functions
# Add context names to colname of Y if missing
add_context_to_Y <- function(res_Y) {
Expand Down Expand Up @@ -614,6 +615,15 @@ qc_regional_data <- function(region_data,
# - add context to colname of Y
Y <- add_context_to_Y(Y)
n_context <- length(X)
# Validate pip_cutoff_to_skip_ind length: allow scalar or exact length; otherwise error
if (length(pip_cutoff_to_skip_ind) == 1) {
pip_cutoff_vec <- rep(pip_cutoff_to_skip_ind, n_context)
} else if (length(pip_cutoff_to_skip_ind) == n_context) {
pip_cutoff_vec <- pip_cutoff_to_skip_ind
} else {
stop(sprintf("pip_cutoff_to_skip_ind length (%d) must be 1 or equal to number of contexts (%d)",
length(pip_cutoff_to_skip_ind), n_context))
}
residual_X <- residual_Y <- list()
keep_contexts <- c()
for (i_context in 1:n_context) {
Expand All @@ -625,8 +635,8 @@ qc_regional_data <- function(region_data,
# - remove variants with maf < maf_cutoff
# tmp <- filter_resX_maf(resX, maf, maf_cutoff = maf_cutoff)
resX <- filter_X(resX, missing_rate_thresh = NULL, maf_thresh = maf_cutoff, maf = maf)
# Initial PIP check
resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_to_skip_ind[i_context], context = context)
# Initial PIP check (use recycled vector element)
resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_vec[i_context], context = context)
if (!is.null(resY)) {
residual_X <- c(residual_X, list(resX))
residual_Y <- c(residual_Y, list(resY))
Expand Down Expand Up @@ -675,6 +685,7 @@ qc_regional_data <- function(region_data,
qc_method = c("rss_qc", "dentist", "slalom"),
impute = TRUE,
impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) {
if (!is.null(qc_method)) qc_method <- match.arg(qc_method)
n_LD <- length(sumstat_data$LD_info)
final_sumstats <- final_LD <- NULL
LD_match <- c()
Expand All @@ -687,7 +698,26 @@ qc_regional_data <- function(region_data,
n <- sumstat$n
var_y <- sumstat$var_y
conditions_sumstat <- names(sumstats)[ii]
pip_cutoff_to_skip_ld <- pip_cutoff_to_skip_sumstat[conditions_sumstat] %>% as.numeric()
# Determine PIP cutoff per sumstat with strict validation:
# - scalar applies to all studies
# - length equals number of studies -> positional mapping
# - named vector that covers ALL study names -> name-based mapping
# Otherwise: throw error (do not silently default)
if (length(pip_cutoff_to_skip_sumstat) == 1) {
pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat)
} else if (length(pip_cutoff_to_skip_sumstat) == length(sumstats) && is.null(names(pip_cutoff_to_skip_sumstat))) {
pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat[ii])
} else if (!is.null(names(pip_cutoff_to_skip_sumstat))) {
missing_names <- setdiff(names(sumstats), names(pip_cutoff_to_skip_sumstat))
if (length(missing_names) > 0) {
stop(sprintf("pip_cutoff_to_skip_sumstat must provide values for all studies; missing: %s",
paste(missing_names, collapse = ", ")))
}
pip_cutoff_to_skip_ld <- as.numeric(pip_cutoff_to_skip_sumstat[conditions_sumstat])
} else {
stop(sprintf("pip_cutoff_to_skip_sumstat length (%d) must be 1, equal to number of studies (%d), or be a named vector covering all studies",
length(pip_cutoff_to_skip_sumstat), length(sumstats)))
}

# Preprocess the input data
preprocess_results <- rss_basic_qc(sumstat$sumstats, LD_data, remove_indels = remove_indels)
Expand Down
15 changes: 14 additions & 1 deletion R/file_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,20 @@ load_phenotype_data <- function(phenotype_path, region, extract_region_name = NU
stop("region_name_col is out of bounds for the number of columns in tabix_data.")
}
} else {
return(tabix_data %>% t())
# No extract_region_name filter: still transpose and assign names appropriately
result <- tabix_data %>% t()
if (!is.null(region_name_col) && (region_name_col %% 1 == 0)) {
if (region_name_col <= ncol(tabix_data)) {
# After transpose, the desired name row is at index region_name_col
colnames(result) <- result[region_name_col, ]
} else {
stop("region_name_col is out of bounds for the number of columns in tabix_data.")
}
} else if (is.null(colnames(result))) {
# Fallback: synthesize names if still missing
colnames(result) <- paste0("region_", seq_len(ncol(result)))
}
return(result)
}
}))

Expand Down