Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
443 changes: 310 additions & 133 deletions R/colocboost.R

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion R/colocboost_assemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ colocboost_assemble <- function(cb_obj,
)
# - save model and all coloc and single information for diagnostic
if (output_level != 1) {
tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = NULL)
tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = data_info$variables)
if (output_level == 2) {
cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_detials))
cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")]
Expand All @@ -79,6 +79,7 @@ colocboost_assemble <- function(cb_obj,
} else if (cb_obj$cb_model_para$model_used == "one_causal"){
# fixme later
check_null_max <- check_null_max * check_null
check_null_max_ucos <- check_null_max_ucos * check_null
}
cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max,
check_null_max_ucos = check_null_max_ucos,
Expand Down Expand Up @@ -209,6 +210,9 @@ colocboost_assemble <- function(cb_obj,
# - colocalization results
cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
cb_obj$cb_model_para$coverage <- coverage
cb_obj$cb_model_para$min_abs_corr <- min_abs_corr
cb_obj$cb_model_para$median_abs_corr <- median_abs_corr
cb_obj$cb_model_para$n_purity <- n_purity
cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info)
cb_output <- list(
"vcp" = cos_results$vcp,
Expand Down
91 changes: 60 additions & 31 deletions R/colocboost_check_update_jk.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {

# -- define jk and jk_each
cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
# abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
abs_cor_vals <- rowSums(abs_cor_vals_each)

jk <- which(abs_cor_vals == max(abs_cor_vals))
jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
jk_temp <- which(temp == max(temp))
return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
})
# jk <- which(abs_cor_vals == max(abs_cor_vals))
# jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
# jk_each <- apply(abs_cor_vals_each, 2, which.max)
jk <- which.max(abs_cor_vals)
jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
update_jk[c(1, pos.update + 1)] <- c(jk, jk_each)


Expand Down Expand Up @@ -142,7 +142,7 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
update_status[pos.update[true_update]] <- 1
# - re-define jk_star
abs_cor_vals_redefine <- rowSums(abs_cor_vals_each[, true_update, drop = FALSE])
jk_redefine <- which(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
jk_redefine <- which.max(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
jk_redefine <- ifelse(length(jk_redefine) == 1, jk_redefine, sample(jk_redefine, 1))
real_update_jk[pos.update[true_update]] <- jk_redefine
} else {
Expand Down Expand Up @@ -351,11 +351,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,

# -- define jk and jk_each
cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation)))
abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
jk_temp <- which(temp == max(temp))
return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
})
# abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
# jk_each <- apply(abs_cor_vals_each, 2, which.max)
jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
pp_focal <- which(pos.update == focal_outcome_idx)
jk_focal <- jk_each[pp_focal]

Expand Down Expand Up @@ -503,8 +502,8 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
if (length(XtX) == 1){
LD_temp <- 0
} else {
jk1.remain <- which(remain_jk == jk1)
jk2.remain <- which(remain_jk == jk2)
jk1.remain <- match(jk1, remain_jk)
jk2.remain <- match(jk2, remain_jk)
LD_temp <- XtX[jk1.remain, jk2.remain]
}
}
Expand Down Expand Up @@ -545,29 +544,59 @@ check_pair_jkeach <- function(jk_each,
cb_data, X_dict,
jk_equiv_corr = 0.8,
jk_equiv_loglik = 0.001) {


#' @importFrom stats cor
get_LD_jk_each <- function(jk_each,
X = NULL, XtX = NULL, N = NULL,
remain_jk = NULL) {
if (!is.null(X)) {
LD_temp <- suppressWarnings({
get_cormat(X[, jk_each])
})
LD_temp[which(is.na(LD_temp))] <- 0
# LD_temp <- LD_temp[1, 2]
} else if (!is.null(XtX)) {
if (length(XtX) == 1){
LD_temp <- matrix(0, length(jk_each), length(jk_each))
} else {
jk.remain <- match(jk_each, remain_jk)
LD_temp <- XtX[jk.remain, jk.remain]
LD_temp[which(is.na(LD_temp))] <- 0
}
}
return(LD_temp)
}

detect_func <- function(idx, LD_all, jk_i, jk_j, i, j){
change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i]
change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j]
change_each <- abs(change_log_jk_i - change_log_jk_j)
LD_temp <- LD_all[[idx]][i, j]
return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr))
}

data_update <- cb_data$data[pos.update]
LD_all <- lapply(1:length(jk_each), function(idx){
get_LD_jk_each(jk_each,
X = cb_data$data[[X_dict[idx]]]$X,
XtX = cb_data$data[[X_dict[idx]]]$XtX,
N = data_update[[idx]]$N,
remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
)
})

# -- check if jk_i ~ jk_j
change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
for (i in 1:length(jk_each)) {

jk_i <- jk_each[i]
change_log_jk_i <- model_update[[i]]$change_loglike[jk_i]
for (j in 1:length(jk_each)) {
for (j in i:length(jk_each)) {
if (j != i) {
jk_j <- jk_each[j]

if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
change_log_jk_j <- model_update[[i]]$change_loglike[jk_j]
change_each <- abs(change_log_jk_i - change_log_jk_j)
LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
X = cb_data$data[[X_dict[i]]]$X,
XtX = cb_data$data[[X_dict[i]]]$XtX,
N = data_update[[i]]$N,
remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
)
change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
} else {
change_each_pair[i, j] <- FALSE
change_each_pair[i, j] <- detect_func(idx = i, LD_all, jk_i, jk_j, i, j)
# if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j
if ( !change_each_pair[i, j] ){
change_each_pair[j, i] <- detect_func(idx = j, LD_all, jk_i, jk_j, i, j)
}
} else {
change_each_pair[i, j] <- FALSE
Expand Down
20 changes: 13 additions & 7 deletions R/colocboost_inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ get_modularity <- function(Weight, B) {

if (m == 0) return(0)

cate <- B %*% t(B)
# cate <- B %*% t(B)
cate <- tcrossprod(B)

if (m_pos == 0 & m_neg == 0) return(0)

Expand Down Expand Up @@ -190,12 +191,12 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) {
#' @noRd
w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95,
min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) {
tmp <- apply(weights, 2, w_cs, coverage = coverage)
tmp_purity <- apply(tmp, 2, function(tt) {
pos <- which(tt == 1)

tmp_purity <- apply(weights, 2, function(w) {
pos <- w_cs(w, coverage = coverage)
# deal with missing snp here
if (!is.null(Xcorr)) {
pos <- match(pos, setdiff(1:length(tmp), miss_idx))
pos <- match(pos, setdiff(1:length(w), miss_idx))
}
get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n)
})
Expand Down Expand Up @@ -373,7 +374,12 @@ check_null_post <- function(cb_obj,
}
if (!weaker_effect) {
check_cs_change <- cs_change
check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max)
if (cb_obj$cb_model_para$L == 1){
check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max_ucos
} else {
check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max
}
check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) check_null_max_tmp)
} else {
check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L)
}
Expand Down Expand Up @@ -420,7 +426,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
corr[which(is.na(corr))] <- 0
value <- abs(get_upper_tri(corr))
} else {
if (sum(Xcorr) == 1){
if (length(Xcorr) == 1){
value <- 0
} else {
Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
Expand Down
5 changes: 3 additions & 2 deletions R/colocboost_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ colocboost_init_model <- function(cb_data,
tmp <- list(
"res" = NULL,
"beta" = rep(0, P),
"weights_path" = c(),
# "weights_path" = c(),
"weights_path" = list(),
"profile_loglike_each" = NULL,
"obj_path" = 999999,
"obj_single" = 999999,
Expand Down Expand Up @@ -619,7 +620,7 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
Z_extend[pos_target] <- current_z[pos_z]

# Calculate submatrix for each unique entry (not duplicates)
if (sum(current_ld_matrix) == 1){
if (length(current_ld_matrix) == 1){
ld_submatrix <- current_ld_matrix
} else {
ld_submatrix <- NULL
Expand Down
10 changes: 4 additions & 6 deletions R/colocboost_one_causal.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data) {
cb_model_para[rm_elements] <- NULL
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
}
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
}

cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
Expand Down Expand Up @@ -238,7 +237,7 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
cb_model_para,
cb_data
)
weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path)
weights <- rbind(weights, unlist(cb_model_tmp[[iy]]$weights_path))
}
###### overlap weights
overlap_pair <- check_pair_overlap(weights, coverage = 0.95)
Expand Down Expand Up @@ -307,11 +306,10 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
cb_model_para[rm_elements] <- NULL
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
}
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
}

cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
class(cb_obj) <- "colocboost"
return(cb_obj)
Expand Down
Loading
Loading