From 07b862e3e20cb7646824c5e7608446329cc8acfa Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Thu, 30 Oct 2025 21:20:12 +0000 Subject: [PATCH 01/10] IBD partitioning & other fixes -IBD partition -Fixed missing g in alphapartdropgroup() -removed centre=FALSE --- R/AlphaPart.R | 170 ++++++++++++++---- R/RcppExports.R | 4 +- R/methods.R | 14 +- src/AlphaPartDrop.cpp | 33 +++- src/RcppExports.cpp | 9 +- tests/testthat/test-plotSummaryAlphaPart.R | 2 +- tests/testthat/test-printAlphaPart.R | 7 +- .../testthat/test-printPlotSummaryAlphaPart.R | 2 +- tests/testthat/test-printSummary-alphapart.R | 2 +- .../testthat/test-savePlotSummaryAlphaPart.R | 2 +- tests/testthat/test-summary-alphapart.R | 4 +- 11 files changed, 195 insertions(+), 54 deletions(-) diff --git a/R/AlphaPart.R b/R/AlphaPart.R index d6a85e1..bc5d2db 100644 --- a/R/AlphaPart.R +++ b/R/AlphaPart.R @@ -74,7 +74,11 @@ #' @param colPath Numeric or character, position or name of a column #' holding path information. #' @param colBV Numeric or character, position(s) or name(s) of -#' column(s) holding genetic Values. +#' column(s) holding genetic values. +#' @param colPaternalBV Numeric or character, position(s) of column(s) +#' holding the paternal genetic values for IBD partitioning. +#' @param colMaternalBV Numeric or character, position(s) of column(s) +#' holding the maternal genetic values for IBD partitioning.. #' @param colBy Numeric or character, position or name of a column #' holding group information (see details). #' @@ -164,6 +168,8 @@ AlphaPart <- function( colMid = 3, colPath = 4, colBV = 5:ncol(x), + colPaternalBV = NULL, + colMaternalBV = NULL, colBy = NULL ) { ## Test if the data is a data.frame @@ -175,12 +181,23 @@ AlphaPart <- function( length(colFid) > 1 | length(colMid) > 1 | length(colPath) > 1 | - length(colBy) > 1) + length(colBy) > 1) | + length(colPaternalBV) > 1 | + length(colMaternalBV) > 1 if (test) { stop( "arguments 'colId', 'colFid', 'colMid', 'colPath', and 'colBy' must be of length 1" ) } + + if (!is.null(colPaternalBV) | !is.null(colMaternalBV)){ + if (is.null(colPaternalBV) | is.null(colMaternalBV)){ + stop("Both 'colPaternalBV' and 'colMaternalBV' must be provided for IBD partitioning.") + } + gameticPartition <- TRUE + } else { + gameticPartition <- FALSE + } if (is.null(colBy)) { groupSummary <- FALSE @@ -237,6 +254,24 @@ AlphaPart <- function( } testN <- NULL # not needed anymore } + + if (gameticPartition){ + if(!is.numeric(colPaternalBV)){ + testN <- length(colPaternalBV) + colPaternalBV <- which(colnames(x) %in% colPaternalBV) + if (length(colPaternalBV) != testN) { + stop("Identification not valid for 'colPaternalBV' column(s) name", call. = FALSE) + } + } + if(!is.numeric(colMaternalBV)){ + testN <- length(colMaternalBV) + colMaternalBV <- which(colnames(x) %in% colMaternalBV) + if (length(colMaternalBV) != testN) { + stop("Identification not valid for 'colMaternalBV' column(s) name", call. = FALSE) + } + } + testN <- NULL # not needed anymore + } ## --- Sort and recode pedigree --- ## Make sure that identifications are numeric if recode=FALSE @@ -253,6 +288,35 @@ AlphaPart <- function( stop("colBV columns must be numeric!") str(x) } + + ## If gametic partitioning, make sure that colPaternalBV and colMaternalBV: + ## - are numeric + ## - the same length as colBV + ## - the columns sum to the colBV columns + if (gameticPartition){ + test <- !sapply(x[, c(colPaternalBV)], is.numeric) + if (any(test)){ + stop("colPaternalBV columns must be numeric!") + str(x) + } + test <- !sapply(x[, c(colMaternalBV)], is.numeric) + if (any(test)){ + stop("colMaternalBV columns must be numeric!") + str(x) + } + test <- length(colPaternalBV) != length(colMaternalBV) + if (any(test)){ + stop("colPaternalBV and colMaternalBV must be of the same length!") + } + test <- length(colPaternalBV) != length(colBV) + if (any(test)){ + stop("colPaternalBV and colMaternalBV must be of the same length as colBV!") + } + test <- any(x[, c(colPaternalBV)] + x[, c(colMaternalBV)] != x[, c(colBV)]) + if (any(test)){ + stop("The sum of colPaternalBV and colMaternalBV must be equal to colBV for each individual!") + } + } ## Sort so that parents precede children if (sort) { @@ -307,7 +371,14 @@ AlphaPart <- function( } } } - y <- cbind(y, as.matrix(x[, colBV])) + + if (!gameticPartition){ + y <- cbind(y, as.matrix(x[, colBV])) + nGP <- 1 # Number of genetic partitions: total + } else { + y <- cbind(y, as.matrix(x[, colBV]), as.matrix(x[, colPaternalBV]), as.matrix(x[, colMaternalBV])) + nGP <- 3 # Number of genetic partitions: total, paternal, maternal + } ## Test if father and mother codes precede children code - ## computational engine needs this @@ -430,6 +501,7 @@ AlphaPart <- function( nI = nI, nP = nP, nT = nT, + nGP = nGP, ped = y, P = as.integer(P), Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))) @@ -448,14 +520,23 @@ AlphaPart <- function( nG = nG, ped = y, P = as.integer(P), - Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))) + Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))), + g = g ) } ## Assign nice column names - colnames(tmp$pa) <- paste(lT, "_pa", sep = "") - colnames(tmp$ms) <- paste(lT, "_ms", sep = "") - colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) + if (!gameticPartition){ + colnames(tmp$pa) <- paste(lT, "_pa", sep = "") + colnames(tmp$ms) <- paste(lT, "_ms", sep = "") + colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) + } else { + lPT <- colnames(x[, c(colBV, colPaternalBV, colMaternalBV), drop=FALSE]) + colnames(tmp$pa) <- paste(lPT, "_pa", sep="") + colnames(tmp$ms) <- paste(lPT, "_ms", sep="") + colnames(tmp$xa) <- c(t(outer(lPT, lP, paste, sep = "_"))) + } + ## --- Massage results --- @@ -466,34 +547,62 @@ AlphaPart <- function( colP <- colnames(tmp$pa) colM <- colnames(tmp$ms) colX <- colnames(tmp$xa) - - for (j in 1:nT) { - ## j <- 1 - Py <- seq(t + 1, t + nP) - ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) - colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) - t <- max(Py) + + if (!gameticPartition) { + for (j in 1:nT) { + ## j <- 1 + Py <- seq(t + 1, t + nP) + ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) + colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) + t <- max(Py) + } + } else { + for (j in 1:nT) { + ## j <- 1 + Py <- c(j-1+seq(t+1, t+nP), j-1+nT*nP+seq(t+1, t+nP), j-1+2*nT*nP+seq(t+1, t+nP)) + ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) + colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) + t <- max(Py) + } } - tmp <- NULL # not needed anymore + + tmp <- NULL # not needed anymore ## Add initial data if (!groupSummary) { - for (i in 1:nT) { - ## Hassle in order to get all columns and to be able to work with - ## numeric or character column "names" - colX <- colX2 <- colnames(x) - names(colX) <- colX - names(colX2) <- colX2 - ## ... put current agv in the last column in original data - colX <- c(colX[!(colX %in% colX[colBV[i]])], colX[colBV[i]]) - ## ... remove other traits - colX <- colX[ - !(colX %in% - colX2[(colX2 %in% colX2[colBV]) & !(colX2 %in% colX2[colBV[i]])]) - ] - ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) - rownames(ret[[i]]) <- NULL + if (!gameticPartition) { + for (i in 1:nT) { + ## Hassle in order to get all columns and to be able to work with + ## numeric or character column "names" + colX <- colX2 <- colnames(x) + names(colX) <- colX + names(colX2) <- colX2 + ## ... put current agv in the last column in original data + colX <- c(colX[!(colX %in% colX[colBV[i]])], colX[colBV[i]]) + ## ... remove other traits + colX <- colX[ + !(colX %in% + colX2[(colX2 %in% colX2[colBV]) & !(colX2 %in% colX2[colBV[i]])]) + ] + ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) + rownames(ret[[i]]) <- NULL + } + } else { + for (i in 1:nT) { + ## Hassle in order to get all columns and to be able to work with + ## numeric or character column "names" + colX <- colX2 <- colnames(x) + names(colX) <- colX; names(colX2) <- colX2 + ## ... put current agv in the last three columns in original data + colX <- c(colX[!(colX %in% c(colX[colBV[i]], colX[colPaternalBV[i]], colX[colMaternalBV[i]]))], + colX[colBV[i]], colX[colPaternalBV[i]], colX[colMaternalBV[i]]) + ## ... remove other traits + colX <- colX[!(colX %in% colX2[(colX2 %in% c(colX2[colBV], colX2[colPaternalBV], colX2[colMaternalBV])) & ! + (colX2 %in% c(colX2[colBV[i]], colX2[colPaternalBV[i]], colX2[colMaternalBV[i]]))])] + ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) + rownames(ret[[i]]) <- NULL + } } } @@ -507,6 +616,7 @@ AlphaPart <- function( lP = lP, nT = nT, lT = lT, + gameticPartition = gameticPartition, warn = NULL ) ## names(ret)[nT+1] <- "info" diff --git a/R/RcppExports.R b/R/RcppExports.R index 71c5b38..bee3530 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,8 +1,8 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -AlphaPartDrop <- function(c1, c2, nI, nP, nT, ped, P, Px) { - .Call(`_AlphaPart_AlphaPartDrop`, c1, c2, nI, nP, nT, ped, P, Px) +AlphaPartDrop <- function(c1, c2, nI, nP, nT, nGP, ped, P, Px) { + .Call(`_AlphaPart_AlphaPartDrop`, c1, c2, nI, nP, nT, nGP, ped, P, Px) } AlphaPartDropGroup <- function(c1, c2, nI, nP, nT, nG, ped, P, Px, g) { diff --git a/R/methods.R b/R/methods.R index e484503..b1166ce 100644 --- a/R/methods.R +++ b/R/methods.R @@ -262,6 +262,7 @@ summary.AlphaPart <- function( nT = nT, lT = lT, by = by, + gameticPartition = object$info$gameticPartition, warn = object$info$warn, labelSum = labelSum ) @@ -275,6 +276,11 @@ summary.AlphaPart <- function( checkCov <- length(cols[-1]) > 1 ## do not run cov if path has 1 level paths <- cols paths[2:length(paths)] <- ret$info$lP + if (object$info$gameticPartition){ + cols <- c(cols, paste(lT[i], lP, sep="_f_"), paste(lT[i], lP, sep = "_m_")) # Review as not necessarily labelled as such? + paths <- cols + paths[2:length(paths)] <- paste(ret$info$lP, c(rep("i", nP), rep("f", nP), rep("m", nP)), sep= "_") + } paths[1] <- labelSum ## Summarize Variance Partitioning @@ -526,6 +532,8 @@ plot.summaryAlphaPart <- nP <- x$info$nP + x$info$nCov ret <- vector(mode = "list", length = nT) names(ret) <- x$info$lT + + ifelse(x$info$gameticPartition, nGP <- 3, nGP <- 1) ## Axis labels if (!is.null(xlab) && length(xlab) > 1) @@ -613,8 +621,8 @@ plot.summaryAlphaPart <- } ## Line type if (is.null(lineTypeList)) { - if (length(lineType) < nP) { - lineType <- c(1, rep(x = lineType, times = nP)) + if (length(lineType) < nP*nGP) { + lineType <- c(1, rep(x = lineType, times = nP*nGP)) } else { lineType <- c(1, lineType) } @@ -643,7 +651,7 @@ plot.summaryAlphaPart <- if (sortValue) { nC <- ncol(tmp0) pathStat <- sapply( - X = tmp0[, (nC - nP + 1):nC], + X = tmp0[, (nC - (nP*nGP) + 1):nC], FUN = sortValueFUN, na.rm = TRUE ) diff --git a/src/AlphaPartDrop.cpp b/src/AlphaPartDrop.cpp index 69eb6cf..1157894 100644 --- a/src/AlphaPartDrop.cpp +++ b/src/AlphaPartDrop.cpp @@ -2,17 +2,17 @@ using namespace Rcpp; // [[Rcpp::export]] -List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, +List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, NumericMatrix ped, IntegerVector P, IntegerVector Px) { // --- Temp --- - int i, j, t, p; + int i, j, t, p, pt, mt, k; // --- Outputs --- - NumericMatrix pa(nI+1, nT); // Parent average - NumericMatrix ms(nI+1, nT); // Mendelian sampling - NumericMatrix xa(nI+1, nP*nT); // Partitions + NumericMatrix pa(nI+1, nT*nGP); // Parent average + NumericMatrix ms(nI+1, nT*nGP); // Mendelian sampling + NumericMatrix xa(nI+1, nP*nT*nGP); // Partitions // NOTE: Rcpp::NumericMatrix is filled by 0s by default // TODO: Maybe we want an algorithm that works on one trait at a time to save on memory? @@ -31,18 +31,41 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, // Mendelian sampling (MS) ms(i, t) = ped(i, 3+t) - pa(i, t); + + // Gametic partition + if (nGP ==3) { + pt = t + nT; // paternal trait index + mt = t + 2*nT; // maternal trait index + pa(i, pt) = c1 * ped(ped(i,1), 3+pt) + c1 * ped(ped(i,1), 3+mt); + pa(i, mt) = c2 * ped(ped(i,2), 3+pt) + c2 * ped(ped(i,2), 3+mt); + ms(i, mt) = ped(i, 3+mt) - pa(i, mt); + ms(i, pt) = ped(i, 3+pt) - pa(i, pt); + } // Parts // ... for the MS part j = Px[t] + P[i]; xa(i, j) = ms(i, t); + + if (nGP == 3) { + j = Px[t] + P[i] + (nT*nP); + xa(i, j) = ms(i, pt); + j = Px[t] + P[i] + (nT*nP*2); + xa(i, j) = ms(i, mt); + } // ... for the PA parts for(p = 0; p < nP; p++) { j = Px[t] + p; xa(i, j) += c1 * xa(ped(i, 1), j) + c2 * xa(ped(i, 2), j); + if (nGP == 3) { + j = Px[t] + p + (nT*nP); + k = Px[t] + p + (nT*nP*2); + xa(i, j) += c1 * xa(ped(i, 1), j) + c1 * xa(ped(i, 1), k); + xa(i, k) += c2 * xa(ped(i, 2), j) + c2 * xa(ped(i, 2), k); + } } } } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 9d6d431..07a198a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,8 +11,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // AlphaPartDrop -List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, NumericMatrix ped, IntegerVector P, IntegerVector Px); -RcppExport SEXP _AlphaPart_AlphaPartDrop(SEXP c1SEXP, SEXP c2SEXP, SEXP nISEXP, SEXP nPSEXP, SEXP nTSEXP, SEXP pedSEXP, SEXP PSEXP, SEXP PxSEXP) { +List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, NumericMatrix ped, IntegerVector P, IntegerVector Px); +RcppExport SEXP _AlphaPart_AlphaPartDrop(SEXP c1SEXP, SEXP c2SEXP, SEXP nISEXP, SEXP nPSEXP, SEXP nTSEXP, SEXP nGPSEXP, SEXP pedSEXP, SEXP PSEXP, SEXP PxSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -21,10 +21,11 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type nI(nISEXP); Rcpp::traits::input_parameter< int >::type nP(nPSEXP); Rcpp::traits::input_parameter< int >::type nT(nTSEXP); + Rcpp::traits::input_parameter< int >::type nGP(nGPSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type ped(pedSEXP); Rcpp::traits::input_parameter< IntegerVector >::type P(PSEXP); Rcpp::traits::input_parameter< IntegerVector >::type Px(PxSEXP); - rcpp_result_gen = Rcpp::wrap(AlphaPartDrop(c1, c2, nI, nP, nT, ped, P, Px)); + rcpp_result_gen = Rcpp::wrap(AlphaPartDrop(c1, c2, nI, nP, nT, nGP, ped, P, Px)); return rcpp_result_gen; END_RCPP } @@ -50,7 +51,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_AlphaPart_AlphaPartDrop", (DL_FUNC) &_AlphaPart_AlphaPartDrop, 8}, + {"_AlphaPart_AlphaPartDrop", (DL_FUNC) &_AlphaPart_AlphaPartDrop, 9}, {"_AlphaPart_AlphaPartDropGroup", (DL_FUNC) &_AlphaPart_AlphaPartDropGroup, 10}, {NULL, NULL, 0} }; diff --git a/tests/testthat/test-plotSummaryAlphaPart.R b/tests/testthat/test-plotSummaryAlphaPart.R index 8b78364..4fd919a 100644 --- a/tests/testthat/test-plotSummaryAlphaPart.R +++ b/tests/testthat/test-plotSummaryAlphaPart.R @@ -10,7 +10,7 @@ test_that("Test plotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) sum <- summary(tmp) expect_error(plot.summaryAlphaPart(sum), "output is provided only when the 'by' argument is defined on the 'summary' function") diff --git a/tests/testthat/test-printAlphaPart.R b/tests/testthat/test-printAlphaPart.R index 256c7c1..d5a763c 100644 --- a/tests/testthat/test-printAlphaPart.R +++ b/tests/testthat/test-printAlphaPart.R @@ -9,10 +9,9 @@ test_that("Test print.AlphaPart", { trt1=c(100, 120, 115, 130, 125, 125), trt2=c(100, 110, 105, 100, 85, 110), gen=c( 1, 1, 2, 2, 3, 3)) - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## Partition additive genetic values - expect_equal(print(tmp$trt1[,"trt1_w"], digits=1), c(100,120,5,130,2.5,125)) - expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE)),NULL) - expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = TRUE)),NULL) + expect_equal(print(tmp$trt1[,"trt1_ms"], digits=1), c(100,120,5,130,2.5,125)) + expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"))),NULL) expect_equal(print(tmp$info),tmp$info) }) diff --git a/tests/testthat/test-printPlotSummaryAlphaPart.R b/tests/testthat/test-printPlotSummaryAlphaPart.R index c1b0398..12f0cd9 100644 --- a/tests/testthat/test-printPlotSummaryAlphaPart.R +++ b/tests/testthat/test-printPlotSummaryAlphaPart.R @@ -9,7 +9,7 @@ test_that("Test print.PlotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV="trt1", center = FALSE) + tmp <- AlphaPart(x=ped, colBV="trt1") sum <- summary(tmp, by="gen") k <- print(plot.summaryAlphaPart(sum)) expect_equal(k,NULL) diff --git a/tests/testthat/test-printSummary-alphapart.R b/tests/testthat/test-printSummary-alphapart.R index e37f739..6b91b82 100644 --- a/tests/testthat/test-printSummary-alphapart.R +++ b/tests/testthat/test-printSummary-alphapart.R @@ -12,7 +12,7 @@ test_that("Test Printsummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## Test summary for trt1 expect_equal(print.AlphaPart(summary(tmp, by="gen")), NULL) }) diff --git a/tests/testthat/test-savePlotSummaryAlphaPart.R b/tests/testthat/test-savePlotSummaryAlphaPart.R index 16b20b4..9346860 100644 --- a/tests/testthat/test-savePlotSummaryAlphaPart.R +++ b/tests/testthat/test-savePlotSummaryAlphaPart.R @@ -10,7 +10,7 @@ test_that("Test savePlotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - m <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + m <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) sum <- summary(m, by="gen") p1 <- plot.summaryAlphaPart(sum) diff --git a/tests/testthat/test-summary-alphapart.R b/tests/testthat/test-summary-alphapart.R index 7e9dc74..fe65474 100644 --- a/tests/testthat/test-summary-alphapart.R +++ b/tests/testthat/test-summary-alphapart.R @@ -12,7 +12,7 @@ test_that("Test summary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## ## Trait: trt1 ## @@ -94,7 +94,7 @@ test_that("Test summary.AlphaPart", { ## Test the direct use of by group analysis in the AlphaPart function ped$gen <- factor(ped$gen) tmp1 <- summary(AlphaPart(x=ped, colBV=c("trt1", "trt2")), by="gen") - tmp2 <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), colBy="gen", center=FALSE) + tmp2 <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), colBy="gen") expect_equal(tmp1, tmp2) }) From 395c68e100dbd4a5cd24d5e60cfb879157dceffc Mon Sep 17 00:00:00 2001 From: RosCraddock Date: Thu, 30 Oct 2025 21:22:09 +0000 Subject: [PATCH 02/10] Update documentation --- man/AlphaPart.Rd | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/man/AlphaPart.Rd b/man/AlphaPart.Rd index 5a1ad11..3eb9603 100644 --- a/man/AlphaPart.Rd +++ b/man/AlphaPart.Rd @@ -17,6 +17,8 @@ AlphaPart( colMid = 3, colPath = 4, colBV = 5:ncol(x), + colPaternalBV = NULL, + colMaternalBV = NULL, colBy = NULL ) } @@ -68,7 +70,13 @@ ication if \code{pedType="IPG"} .} holding path information.} \item{colBV}{Numeric or character, position(s) or name(s) of -column(s) holding genetic Values.} +column(s) holding genetic values.} + +\item{colPaternalBV}{Numeric or character, position(s) of column(s) +holding the paternal genetic values for IBD partitioning.} + +\item{colMaternalBV}{Numeric or character, position(s) of column(s) +holding the maternal genetic values for IBD partitioning..} \item{colBy}{Numeric or character, position or name of a column holding group information (see details).} From e9221947e3f8fb6c19b66e3163aa6bddfe57da70 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Thu, 30 Oct 2025 23:01:06 +0000 Subject: [PATCH 03/10] ibd corrections - column names - handling multiple traits --- R/AlphaPart.R | 23 +++++++++++++++-------- R/methods.R | 5 ----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R/AlphaPart.R b/R/AlphaPart.R index bc5d2db..f818c5c 100644 --- a/R/AlphaPart.R +++ b/R/AlphaPart.R @@ -181,9 +181,7 @@ AlphaPart <- function( length(colFid) > 1 | length(colMid) > 1 | length(colPath) > 1 | - length(colBy) > 1) | - length(colPaternalBV) > 1 | - length(colMaternalBV) > 1 + length(colBy) > 1) if (test) { stop( "arguments 'colId', 'colFid', 'colMid', 'colPath', and 'colBy' must be of length 1" @@ -531,10 +529,15 @@ AlphaPart <- function( colnames(tmp$ms) <- paste(lT, "_ms", sep = "") colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) } else { - lPT <- colnames(x[, c(colBV, colPaternalBV, colMaternalBV), drop=FALSE]) + #lPT <- colnames(x[, c(colBV, colPaternalBV, colMaternalBV), drop=FALSE]) + lP <- c(lP, paste0(lP, "_paternal"), paste0(lP, "_maternal")) + lPT <- paste0( + rep(lT, each = 3), + c("", "_paternal", "_maternal") + ) colnames(tmp$pa) <- paste(lPT, "_pa", sep="") colnames(tmp$ms) <- paste(lPT, "_ms", sep="") - colnames(tmp$xa) <- c(t(outer(lPT, lP, paste, sep = "_"))) + colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) } @@ -557,12 +560,16 @@ AlphaPart <- function( t <- max(Py) } } else { + w <- 1 + t <- 0 for (j in 1:nT) { ## j <- 1 - Py <- c(j-1+seq(t+1, t+nP), j-1+nT*nP+seq(t+1, t+nP), j-1+2*nT*nP+seq(t+1, t+nP)) - ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) - colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) + Py <- seq(t+1, j*nGP*nP) + pams <- c(w, w+1, w+2) + ret[[j]] <- cbind(tmp$pa[-1, pams], tmp$ms[-1, pams], tmp$xa[-1, Py]) + colnames(ret[[j]]) <- c(colP[pams], colM[pams], colX[Py]) t <- max(Py) + w <- w + 3 } } diff --git a/R/methods.R b/R/methods.R index b1166ce..046dec6 100644 --- a/R/methods.R +++ b/R/methods.R @@ -276,11 +276,6 @@ summary.AlphaPart <- function( checkCov <- length(cols[-1]) > 1 ## do not run cov if path has 1 level paths <- cols paths[2:length(paths)] <- ret$info$lP - if (object$info$gameticPartition){ - cols <- c(cols, paste(lT[i], lP, sep="_f_"), paste(lT[i], lP, sep = "_m_")) # Review as not necessarily labelled as such? - paths <- cols - paths[2:length(paths)] <- paste(ret$info$lP, c(rep("i", nP), rep("f", nP), rep("m", nP)), sep= "_") - } paths[1] <- labelSum ## Summarize Variance Partitioning From e7b425f3ace354a8a8b4210d0e6482a48db4285f Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:10:30 +0000 Subject: [PATCH 04/10] More corrections - multiple traits with IBD partitioning --- R/AlphaPart.R | 10 ++-------- src/AlphaPartDrop.cpp | 14 +++++++------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/R/AlphaPart.R b/R/AlphaPart.R index f818c5c..b0a722f 100644 --- a/R/AlphaPart.R +++ b/R/AlphaPart.R @@ -531,15 +531,11 @@ AlphaPart <- function( } else { #lPT <- colnames(x[, c(colBV, colPaternalBV, colMaternalBV), drop=FALSE]) lP <- c(lP, paste0(lP, "_paternal"), paste0(lP, "_maternal")) - lPT <- paste0( - rep(lT, each = 3), - c("", "_paternal", "_maternal") - ) + lPT <- unlist(lapply(c("","_paternal", "_maternal"), function(s) paste0(lT, s))) colnames(tmp$pa) <- paste(lPT, "_pa", sep="") colnames(tmp$ms) <- paste(lPT, "_ms", sep="") colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) } - ## --- Massage results --- @@ -560,16 +556,14 @@ AlphaPart <- function( t <- max(Py) } } else { - w <- 1 t <- 0 for (j in 1:nT) { ## j <- 1 Py <- seq(t+1, j*nGP*nP) - pams <- c(w, w+1, w+2) + pams <- c(j, j+nT, j+2*nT) ret[[j]] <- cbind(tmp$pa[-1, pams], tmp$ms[-1, pams], tmp$xa[-1, Py]) colnames(ret[[j]]) <- c(colP[pams], colM[pams], colX[Py]) t <- max(Py) - w <- w + 3 } } diff --git a/src/AlphaPartDrop.cpp b/src/AlphaPartDrop.cpp index 1157894..e519552 100644 --- a/src/AlphaPartDrop.cpp +++ b/src/AlphaPartDrop.cpp @@ -22,7 +22,7 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, // https://github.com/AlphaGenes/AlphaPart/issues/13 // --- Compute --- - + // review for multiple traits with gametic partitioning for(i = 1; i < nI+1; i++) { for(t = 0; t < nT; t++) { // Parent average (PA) @@ -45,24 +45,24 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, // Parts // ... for the MS part - j = Px[t] + P[i]; + j = nGP*Px[t] + P[i]; xa(i, j) = ms(i, t); if (nGP == 3) { - j = Px[t] + P[i] + (nT*nP); + j = nGP*Px[t] + P[i] + nP; xa(i, j) = ms(i, pt); - j = Px[t] + P[i] + (nT*nP*2); + j = nGP*Px[t] + P[i] + nP*2; xa(i, j) = ms(i, mt); } // ... for the PA parts for(p = 0; p < nP; p++) { - j = Px[t] + p; + j = nGP*Px[t] + p; xa(i, j) += c1 * xa(ped(i, 1), j) + c2 * xa(ped(i, 2), j); if (nGP == 3) { - j = Px[t] + p + (nT*nP); - k = Px[t] + p + (nT*nP*2); + j = nGP*Px[t] + p + nP; + k = nGP*Px[t] + p + nP*2; xa(i, j) += c1 * xa(ped(i, 1), j) + c1 * xa(ped(i, 1), k); xa(i, k) += c2 * xa(ped(i, 2), j) + c2 * xa(ped(i, 2), k); } From 5a8a36c92bd7b43c8bb2df3c4318f30a9664c8e2 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:11:28 +0000 Subject: [PATCH 05/10] completed to do --- src/AlphaPartDrop.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/AlphaPartDrop.cpp b/src/AlphaPartDrop.cpp index e519552..eaac10f 100644 --- a/src/AlphaPartDrop.cpp +++ b/src/AlphaPartDrop.cpp @@ -22,7 +22,6 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, // https://github.com/AlphaGenes/AlphaPart/issues/13 // --- Compute --- - // review for multiple traits with gametic partitioning for(i = 1; i < nI+1; i++) { for(t = 0; t < nT; t++) { // Parent average (PA) From acc0045e03aa4de3e2aa24ab036967f35ad3e38c Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Fri, 31 Oct 2025 11:54:59 +0000 Subject: [PATCH 06/10] Test for IBD - test for IBD (single trait) - commenting break downs the theory --- tests/testthat/test-alphapart.R | 124 ++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/tests/testthat/test-alphapart.R b/tests/testthat/test-alphapart.R index e170522..7fa2334 100644 --- a/tests/testthat/test-alphapart.R +++ b/tests/testthat/test-alphapart.R @@ -764,3 +764,127 @@ test_that("Test computation - 2nd example", { expect_true(part$trait1$trait1_domestic[part$trait1$id == "V"] == 68.875) expect_true(part$trait1$trait1_import[part$trait1$id == "V"] == 40.125) }) + +test_that("Test IBD computation, one trait", { + # dput(AlphaPart.ped) so we have a fixed dataset + dat <- structure( + list( + id = structure( + c(1L, 2L, 3L, 4L, 5L, 6L, 7L), + levels = c("1", "2", "3", "4", "5", "6", "7"), + class = "factor" + ), + father = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "1", "4", "3"), + class = "factor" + ), + mother = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "2", "5", "6"), + class = "factor" + ), + generation = c(1, 1, 2, 1, 1, 2, 3), + group = structure( + c(1L, 1L, 1L, 2L, 2L, 2L, 2L), + levels = c("1", "2"), + class = "factor" + ), + trait1 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait1_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait1_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75) + ), + row.names = c(NA, -7L), + class = "data.frame" + ) + part <- AlphaPart(x = dat, colPath = "group", colBV = "trait1", + colPaternalBV = "trait1_paternal", + colMaternalBV = "trait1_maternal", verbose = 0) + + # group 1 is blue and group 2 is red + # a_1 & = 0 + \color{blue}{r_1} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "1"] == 1) + expect_true(part$trait1$trait1_2[part$trait1$id == "1"] == 0) + # a_paternal_1 & = 0 + \color{blue}{r_paternal_1} \\ + # & = \color{blue}{0.5} = 0.5 \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "1"] == 0.5) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "1"] == 0) + # a_maternal_1 & = 0 + \color{blue}{r_maternal_1} \\ + # & = \color{blue}{0.5} = 0.5 \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "1"] == 0.5) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "1"] == 0) + # a_2 & = 0 + \color{blue}{r_2} \\ + # & = \color{blue}{2} = 2 \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "2"] == 2) + expect_true(part$trait1$trait1_2[part$trait1$id == "2"] == 0) + # a_paternal_2 & = 0 + \color{blue}{r_paternal_2} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "2"] == 1) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "2"] == 0) + # a_maternal_2 & = 0 + \color{blue}{r_maternal_2} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "2"] == 1) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "2"] == 0) + # a_3 & = \color{blue}{\frac{1}{2} r_1} + \color{blue}{\frac{1}{2} r_2} + + # \color{blue{r_3} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "3"] == 1.50) + expect_true(part$trait1$trait1_2[part$trait1$id == "3"] == 0) + # a_paternal_3 & = \color{blue}{\frac{1}{2} r_paternal_1} + \color{blue}{\frac{1}{2} r_maternal_1} \\ + # + \color{blue}{r_paternal_3} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "3"] == 0.5) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "3"] == 0) + # a_maternal_3 & = \color{blue}{\frac{1}{2} r_paternal_2} + \color{blue}{\frac{1}{2} r_maternal_2} \\ + # + \color{blue}{r_maternal_3} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "3"] == 1) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "3"] == 0) + # a_4 & = 0 + \color{red}{r_4} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "4"] == 0) + # a_paternal_4 & = 0 + \color{red}{r_paternal_4} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "4"] == 0) + # a_maternal_4 & = 0 + \color{red}{r_maternal_4} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "4"] == 0) + # a_5 & = 0 + \color{red}{r_5} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "5"] == 2) + # a_paternal_5 & = 0 + \color{red}{r_paternal_5} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "5"] == 1) + # a_maternal_5 & = 0 + \color{red}{r_maternal_5} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "5"] == 1) + # a_6 & = \color{red}{\frac{1}{2} r_4} + \color{red}{\frac{1}{2} r_5 + + # \color{red}{r_6} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "6"] == 1) + # a_paternal_6 & = \color{red}{\frac{1}{2} r_paternal_4} + \color{red}{\frac{1}{2} r_maternal_4} \\ + # + \color{red}{r_paternal_6} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "6"] == 0) + # a_maternal_6 & = \color{red}{\frac{1}{2} r_paternal_5} + \color{red{\frac{1}{2} r_maternal_5} \\ + # + \color{blue}{r_maternal_6} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "6"] == 1) + # a_7 & = \color{blue}{\frac{1}{4} r_1} + \color{blue}{\frac{1}{4} r_2} + + # \color{red{\frac{1}{4} r_4} + \color{red}{\frac{1}{4} r_5} + + # \color{blue}{\frac{1}{2} r_3} + \color{red}{\frac{1}{2} r_6 + + # \color{red}{r_7} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "7"] == 0.75) + expect_true(part$trait1$trait1_2[part$trait1$id == "7"] == 0.5) + # a_paternal_7 & = \color{blue}{\frac{1}{4} r_paternal_1} + \color{blue}{\frac{1}{4} r_maternal_1} \\ + # + \color{blue}{\frac{1}{4} r_paternal_2} + \color{blue}{\frac{1}{4} r_maternal_2} \\ + # + \color{blue}{\frac{1}{2} r_paternal_3} + \color{blue}{\frac{1}{2} r_maternal_3} \\ + # + \color{red}{r_paternal_7} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "7"] == 0.75) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "7"] == -0.25) + # a_maternal_7 & = \color{red}{\frac{1}{4} r_paternal_4} + \color{red}{\frac{1}{4} r_maternal_4} \\ + # + \color{red}{\frac{1}{4} r_paternal_5} + \color{red}{\frac{1}{4} r_maternal_5} \\ + # + \color{red}{\frac{1}{2} r_paternal_6} + \color{red}{\frac{1}{2} r_maternal_6} \\ + # + \color{red}{r_maternal_7} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "7"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "7"] == 0.75) +}) + From a3cb03333a3f9ddfc841a8796e6ab6de431992f5 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Fri, 31 Oct 2025 12:00:26 +0000 Subject: [PATCH 07/10] Test for multiple traits --- tests/testthat/test-alphapart.R | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/testthat/test-alphapart.R b/tests/testthat/test-alphapart.R index 7fa2334..597da4c 100644 --- a/tests/testthat/test-alphapart.R +++ b/tests/testthat/test-alphapart.R @@ -888,3 +888,64 @@ test_that("Test IBD computation, one trait", { expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "7"] == 0.75) }) +test_that("Test IBD computation, one trait", { + # dput(AlphaPart.ped) so we have a fixed dataset + dat <- structure( + list( + id = structure( + c(1L, 2L, 3L, 4L, 5L, 6L, 7L), + levels = c("1", "2", "3", "4", "5", "6", "7"), + class = "factor" + ), + father = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "1", "4", "3"), + class = "factor" + ), + mother = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "2", "5", "6"), + class = "factor" + ), + generation = c(1, 1, 2, 1, 1, 2, 3), + group = structure( + c(1L, 1L, 1L, 2L, 2L, 2L, 2L), + levels = c("1", "2"), + class = "factor" + ), + trait1 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait1_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait1_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75), + trait2 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait2_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait2_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75) + ), + row.names = c(NA, -7L), + class = "data.frame" + ) + part <- AlphaPart(x = dat, colPath = "group", colBV = c("trait1", "trait2"), + colPaternalBV = c("trait1_paternal", "trait2_paternal"), + colMaternalBV = c("trait1_maternal", "trait2_maternal"), verbose = 0) + + # group 1 is blue and group 2 is red + # Just test second trait on individual 7 + # a_7 & = \color{blue}{\frac{1}{4} r_1} + \color{blue}{\frac{1}{4} r_2} + + # \color{red{\frac{1}{4} r_4} + \color{red}{\frac{1}{4} r_5} + + # \color{blue}{\frac{1}{2} r_3} + \color{red}{\frac{1}{2} r_6 + + # \color{red}{r_7} \\ + expect_true(part$trait2$trait2_1[part$trait2$id == "7"] == 0.75) + expect_true(part$trait2$trait2_2[part$trait2$id == "7"] == 0.5) + # a_paternal_7 & = \color{blue}{\frac{1}{4} r_paternal_1} + \color{blue}{\frac{1}{4} r_maternal_1} \\ + # + \color{blue}{\frac{1}{4} r_paternal_2} + \color{blue}{\frac{1}{4} r_maternal_2} \\ + # + \color{blue}{\frac{1}{2} r_paternal_3} + \color{blue}{\frac{1}{2} r_maternal_3} \\ + # + \color{red}{r_paternal_7} \\ + expect_true(part$trait2$trait2_1_paternal[part$trait2$id == "7"] == 0.75) + expect_true(part$trait2$trait2_2_paternal[part$trait2$id == "7"] == -0.25) + # a_maternal_7 & = \color{red}{\frac{1}{4} r_paternal_4} + \color{red}{\frac{1}{4} r_maternal_4} \\ + # + \color{red}{\frac{1}{4} r_paternal_5} + \color{red}{\frac{1}{4} r_maternal_5} \\ + # + \color{red}{\frac{1}{2} r_paternal_6} + \color{red}{\frac{1}{2} r_maternal_6} \\ + # + \color{red}{r_maternal_7} \\ + expect_true(part$trait2$trait2_1_maternal[part$trait2$id == "7"] == 0) + expect_true(part$trait2$trait2_2_maternal[part$trait2$id == "7"] == 0.75) +}) + From 222003758c9a74279c3538beff853f43d83f5d4d Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Fri, 31 Oct 2025 12:02:20 +0000 Subject: [PATCH 08/10] minor --- tests/testthat/test-alphapart.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-alphapart.R b/tests/testthat/test-alphapart.R index 597da4c..6260415 100644 --- a/tests/testthat/test-alphapart.R +++ b/tests/testthat/test-alphapart.R @@ -888,7 +888,7 @@ test_that("Test IBD computation, one trait", { expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "7"] == 0.75) }) -test_that("Test IBD computation, one trait", { +test_that("Test IBD computation, two traits", { # dput(AlphaPart.ped) so we have a fixed dataset dat <- structure( list( From 0edf1cc3b7d2f62faa6a64f06796c3f54568b174 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Tue, 2 Dec 2025 16:00:40 +0000 Subject: [PATCH 09/10] ibd vignette and minor update - First draft of ibd vignette to demonstrate ibd partitioning in AlphaPart. - minor update to c++ code. --- src/AlphaPartDrop.cpp | 2 +- vignettes/ibd.Rmd | 153 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/src/AlphaPartDrop.cpp b/src/AlphaPartDrop.cpp index eaac10f..3b6a821 100644 --- a/src/AlphaPartDrop.cpp +++ b/src/AlphaPartDrop.cpp @@ -6,7 +6,7 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, NumericMatrix ped, IntegerVector P, IntegerVector Px) { // --- Temp --- - int i, j, t, p, pt, mt, k; + int i, j, t, p, pt=0, mt=0, k; // --- Outputs --- diff --git a/vignettes/ibd.Rmd b/vignettes/ibd.Rmd index b9ada40..30d62eb 100644 --- a/vignettes/ibd.Rmd +++ b/vignettes/ibd.Rmd @@ -1,6 +1,6 @@ --- title: "AlphaPart: Partitioning with idenity-by-descent (IBD) information" -author: "Ros Craddock and Gregor Gorjanc" +author: "Rosalind Craddock and Gregor Gorjanc" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > @@ -12,14 +12,161 @@ vignette: > ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` -TODO: Implement gametic/IBD/phased-based partitioning #18 - https://github.com/AlphaGenes/AlphaPart/issues/18 ## Introduction +Through application of the gametes model into AlphaPart, it is possible to partition genetic values (such as phased genotype probabilities) on identity-by-descent (IBD) information. This allows the partitioning of genetic values into contributions from individual haplotypes or chromosome segments to leverage phased genotype data. Thus, providing further insights into the sources of genetic variation within a population. This vignette will demonstrate how to use AlphaPart for partitioning with IBD information and, briefly, how to interpret the results. * TODO: Cite Ros's paper on partitioning with IBD information #44 https://github.com/AlphaGenes/AlphaPart/issues/44 +## Load packages +```{r} +rm(list=ls()) +library(AlphaPart) +``` + +## Data preparation +To demonstrate partitioning with IBD information, we will use a simulated dataset with id, mother id, father id, and phased genotype probabilities for each individual. For this example, we have simulated 10 individuals, all with phased genotypes at a single locus (e.g., a SNP). This is for demonstration purpose only; in practice, you would use a larger datasets to achieve different results to without IBD information. + +Note here that we include one UPG to handle the non-zero mean of the allele dosages in the founders. For more information on this, please refer to the `founders.rmd` vignette. + +### Dataset +```{r} +ped <- data.frame( + id = c("UPG1", 1:10), + fid = c(0, "UPG1", "UPG1", "UPG1", "UPG1", "UPG1", 2, 4, 4, 7, 8), + mid = c(0, "UPG1", "UPG1", "UPG1", "UPG1", "UPG1", 1, 3, 5, 6, 9), + group = c("UPG1", "female", "male", "female", "male", "female", "female", "male", "male", "female", "female"), + gen = c(0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4), + locus1_geno_probAA = c(NA, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), + locus1_geno_probBA = c(NA, 0.5, 0.5, 0, 0.5, 0, 0, 1, 1, 1, 0), + locus1_geno_probAB = c(NA, 0.5, 0.5, 0, 0.5, 0, 0, 0, 0, 0, 0), + locus1_geno_probBB = c(NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) +) + +ped +``` + +### Reorganise data for AlphaPart +There are two steps to reorganise the data for AlphaPart: +1. Convert the phased genotype probabilities into allele dosages (or allele frequencies, if preferred). Call this column locus1_geno +2. Convert the phased genotype probabilities into two columns: paternal_BV and maternal_BV, representing the contributions from the paternal and maternal haplotypes, respectively. + +```{r} +# Step 1: Calculate allele dosages +ped$locus1_geno <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 1 + + locus1_geno_probAB * 1 + + locus1_geno_probBB * 2 +) + +# Estimate for the UPG using the mean of the founders +ped$locus1_geno[ped$id == "UPG1"] <- mean(ped$locus1_geno[which(ped$gen == 1)], na.rm = TRUE) +ped +``` +```{r} +# Step 2: Calculate paternal and maternal contributions +ped$locus1_paternal <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 1 + + locus1_geno_probAB * 0 + + locus1_geno_probBB * 1 +) +ped$locus1_maternal <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 0 + + locus1_geno_probAB * 1 + + locus1_geno_probBB * 1 +) + +# For UPG, just half the allele dosage +ped$locus1_paternal[ped$id == "UPG1"] <- ped$locus1_geno[ped$id == "UPG1"] / 2 +ped$locus1_maternal[ped$id == "UPG1"] <- ped$locus1_geno[ped$id == "UPG1"] / 2 + +ped +``` + +## Partitioning with AlphaPart +Now that the data is prepared, we can use AlphaPart to partition by IBD. Compared to the standard AlphaPart, this requires two additional arguments to be defined: +1. colPaternalBV: the column name or number representing the paternal haplotype contributions +2. colMaternalBV: the column name or number representing the maternal haplotype contributions + +```{r} +part_ibd <- AlphaPart( + x = ped[c(1:5,10:12)], + colId = "id", + colFid = "fid", + colMid = "mid", + colPath = "group", + colBV = "locus1_geno", + colPaternalBV = "locus1_paternal", + colMaternalBV = "locus1_maternal" +) + +part_ibd[["locus1_geno"]] + +``` +Both results for standard partitioning and partitioning using IBD information are recorded within the same table for trait "locus1_geno". There are results for the parent average, the Mendelian sampling term, and finally the partitions for both standard and IBD partitioning. In the next section, we will use these results to summarise and plot the contributions over generations and briefly discuss interpretation. + +## Interpretting the results + +### Plot the results by generation first without IBD information +```{r} +part_ibdSum <- summary(part_ibd, by = "gen") +# First lets plot the standard partitioning without IBD information +part_stand_sum <- part_ibdSum[["locus1_geno"]][c(1,3,4,5,6)] +# Convert to long format +part_stand_long <- reshape2::melt(part_stand_sum, id.vars = "gen") +# Plot standard partitioning +library(ggplot2) +standardPlot <- ggplot(part_stand_long, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Standard Partitioning without IBD information", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#DC697D" ,"#90D5FF", "#FFB14E" )) +plot(standardPlot) +``` +As we only have one UPG, the UPG contribution is the same throughout all generations as shown in the yellow. The within pedigree partitions for male (blue) and female (pink) contributions change over time. All three partitions sum up to give the overall change in allele dosage across generations (in black). In generation 1 and 2, the male contributes more to the allele dosage compared to females since all alternative alleles are found in males. In generation 3, there is one individual who is female with allele dosage 1 and Mendelian sampling term of 0.5. This increases the female contribution to reduce the distance between males and females. The male contribution is only marginally reduced in generation 3 due to contributions from the sire (Mendelian sampling term of 0.5) and grandsire (Mendelain sampling term of 0.4) for the alternative allele. In generation 4, the female contribution is now larger than the male contribution because individual 10 is female and has a large Mendelian sampling term of 1. + +### Plot the results by generation with IBD information +```{r} +part_ibd_only <- part_ibdSum[["locus1_geno"]][c(1,3,7:12)] +# Convert to long format +part_ibd_long <- reshape2::melt(part_ibd_only, id.vars = "gen") +# Plot IBD partitioning +ibdPlot <- ggplot(part_ibd_long, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Partitioning with IBD information", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#FFCCDE", "#C4FFFF", "#FFFFAF", "#FF70A3", "#00AEFF", "#FFE313")) +plot(ibdPlot) +``` +When partitioning with IBD information we double the number of partitions compared to standard partitioning. This is because we now have separate contributions from paternal and maternal haplotypes for each group. The black line is the same as before and is equivalent to the sum of all the six IBD partitions. Both UPG_paternal and UPG_maternal are equivalent as expected with a value of 0.3 in all generations. Generally the female_maternal path is negative, with the exception of generation 4. The male_paternal partition has a consistently high contribution across all generations. + +### Compare both without and with IBD information +```{r} +# Plot both graphs side by side +part_ibd_long$type <- "With IBD" +part_stand_long$type <- "Without IBD" +combined_part <- rbind(part_ibd_long, part_stand_long) +combinedPlot <- ggplot(combined_part, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Partitioning Comparison", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#DC697D" ,"#90D5FF", "#FFB14E" , + "#FFCCDE", "#C4FFFF", "#FFFFAF", "#FF70A3", "#00AEFF", "#FFE313")) + + facet_wrap(~type) +plot(combinedPlot) +``` +When visualised side by side, its clear to see that the male partition (on the right) is largely driven by the paternal segregation of the alternative allele (on the left). The same is observed in the female partition (right) which is largely driven by the maternal segregation of the normal allele (left). Given in this simplistic example all males are sires and all females (except one) are dams, this is expected. In practice, with larger datasets, multiple loci, and more complex pedigrees and/or grouping, the IBD partitioning can provide further insights into the sources of genetic variation within a population. With multiple loci, the described process is repeated across each locus. Thus, each locus is assumed independent when partitioning in AlphaPart but the results can be aggregated across loci post-partitioning. + ## References * TODO: Cite Ros's paper on partitioning with IBD information #44 From 5497806240901961b3bf44c3d071608cbafa5c9b Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:13:44 +0000 Subject: [PATCH 10/10] Update to error chandling Small update in error handling for ibd. Otherwise, an error would be flagged unnecessarily. Tested and working on real data. --- R/AlphaPart.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AlphaPart.R b/R/AlphaPart.R index b0a722f..8497064 100644 --- a/R/AlphaPart.R +++ b/R/AlphaPart.R @@ -310,7 +310,7 @@ AlphaPart <- function( if (any(test)){ stop("colPaternalBV and colMaternalBV must be of the same length as colBV!") } - test <- any(x[, c(colPaternalBV)] + x[, c(colMaternalBV)] != x[, c(colBV)]) + test <- any(abs(x[, c(colBV)] - x[, c(colPaternalBV)] - x[, c(colMaternalBV)]) > 1e-8) if (any(test)){ stop("The sum of colPaternalBV and colMaternalBV must be equal to colBV for each individual!") }