From d40d97d6a498b1f313d6eef8b6606d2dba46b865 Mon Sep 17 00:00:00 2001 From: Sam Rogers <7007561+rogerssam@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:10:43 +1030 Subject: [PATCH 1/8] Updating transfer packages and gh_packages --- R/gh_packages.R | 80 ++++++++++++++++++++++++++++++++++++------- R/transfer_packages.R | 43 +++++++++++++++-------- 2 files changed, 95 insertions(+), 28 deletions(-) diff --git a/R/gh_packages.R b/R/gh_packages.R index 82e2eed..1e9be4f 100644 --- a/R/gh_packages.R +++ b/R/gh_packages.R @@ -1,20 +1,74 @@ -#' Checks if a package was installed from GitHub +#' Identify packages installed from remote sources +#' +#' This function checks which packages in the provided list were installed from +#' remote sources (GitHub, GitLab, etc.) rather than CRAN, and returns their +#' remote source information. #' #' @param pkg A character vector of package names to check. #' -#' @return A dataframe containing packages installed from remote sources, with the type of remote, account name and repo name. +#' @return A data frame containing packages installed from remote sources with columns: +#' \itemize{ +#' \item \code{type}: The type of remote source (e.g., "github", "gitlab") +#' \item \code{account}: The account/username of the remote repository +#' \item \code{repo}: The repository name (same as package name) +#' } +#' Returns an empty data frame if no packages are from remote sources. +#' +#' @details +#' The function examines the DESCRIPTION file of each package to determine if it +#' was installed from a remote source. Packages installed from CRAN or other +#' standard repositories will not have RemoteType information. +#' #' @keywords internal #' +#' @examples +#' \dontrun{ +#' # Check if specific packages are from GitHub +#' gh_packages(c("ggplot2", "devtools", "BiometryTools")) +#' +#' # Check all installed packages +#' gh_packages(rownames(installed.packages())) +#' } +#' gh_packages <- function(pkg) { - list <- which(sapply(pkg, function(x) !is.null(packageDescription(x)$RemoteType))) - results <- data.frame() - if (length(list) > 0) { - type <- sapply(pkg[list], function(x) packageDescription(x)$RemoteType) - username <- sapply(pkg[list], function(x) ifelse(is.null(packageDescription(x)$RemoteUsername), NA, packageDescription(x)$RemoteUsername)) - repo <- names(list) - results <- data.frame(type, account = username, repo) - results <- subset(results, type != "standard") - rownames(results) <- NULL - } - return(results) + # Input validation + if (!is.character(pkg) || length(pkg) == 0) { + warning("'pkg' must be a non-empty character vector") + return(data.frame(type = character(), account = character(), repo = character())) + } + + # Get package descriptions once and store them + descriptions <- lapply(pkg, function(x) { + tryCatch(packageDescription(x), + error = function(e) NULL, + warning = function(w) NULL) + }) + names(descriptions) <- pkg + + # Filter to only packages with remote information + remote_packages <- descriptions[!sapply(descriptions, function(desc) { + is.null(desc) || is.null(desc$RemoteType) + })] + + # Return empty data frame if no remote packages found + if (length(remote_packages) == 0) { + return(data.frame(type = character(), account = character(), repo = character())) + } + + # Extract information for remote packages + results <- data.frame( + type = sapply(remote_packages, function(desc) desc$RemoteType %||% NA_character_), + account = sapply(remote_packages, function(desc) desc$RemoteUsername %||% NA_character_), + repo = names(remote_packages), + stringsAsFactors = FALSE + ) + + # Filter out "standard" type packages and reset row names + results <- results[results$type != "standard" & !is.na(results$type), ] + rownames(results) <- NULL + + return(results) } + +# Helper function for null coalescing (similar to %||% in rlang) +`%||%` <- function(x, y) if (is.null(x)) y else x diff --git a/R/transfer_packages.R b/R/transfer_packages.R index c4524bc..a988007 100644 --- a/R/transfer_packages.R +++ b/R/transfer_packages.R @@ -1,21 +1,34 @@ -#' Easily reinstall all currently installed packages on another machine or version of R. +#' Easily reinstall all currently installed packages on another machine or version of R #' #' @param library The location of the library on the current machine to copy. -#' @param output One of `online` (the default), `gist` or `local`. Saves a list of installed packages to the chosen location, and provides instructions on how to use this to (re)install the packages elsewhere. See details for more information. -#' @param expiry Expiry for online file store in days. Weeks can be given with `w`, or months with `m`. Default is 7 days. Will be ignored if `output` is not `online`. -#' @param filename Filename for the local output file. Ignored if `output` is not set to `local`. -#' @param list_remotes Logical (default `TRUE`). Check for any packages installed from repositories other than CRAN, and output instructions to reinstall. +#' @param output One of `online` (the default), `gist` or `local`. Saves a list of +#' installed packages to the chosen location, and provides instructions on how to +#' use this to (re)install the packages elsewhere. See details for more information. +#' @param expiry Expiry for online file store in days. Weeks can be given with `w`, +#' or months with `m`. Default is 7 days. Will be ignored if `output` is not `online`. +#' @param filename Filename for the local output file. Ignored if `output` is not +#' set to `local`. +#' @param list_remotes Logical (default `TRUE`). Check for any packages installed +#' from repositories other than CRAN, and output instructions to reinstall. #' @param quiet Logical (default `FALSE`). Suppress output if `TRUE`. #' #' @importFrom httr POST content #' @importFrom crayon green blue #' @importFrom glue glue glue_col glue_collapse single_quote #' -#' @details If `output` is `online`, the resulting list of currently installed packages is stored on [https://file.io](https://file.io) for the time specified in `expiry`, or until the URL is first accessed. -#' Note that either visiting the URL in a browser or sourcing the URL via R count as access, and it will be removed after either. -#' If `output` is `local`, an R script file (`.R`) is saved to the current working directory, which can be transferred manually to another machine. +#' @details If `output` is `online`, the resulting list of currently installed packages +#' is stored on [https://file.io](https://file.io) for the time specified in `expiry`, +#' or until the URL is first accessed. Note that either visiting the URL in a browser +#' or sourcing the URL via R count as access, and it will be removed after either. #' -#' @return Prints instructions to console if `quiet = FALSE`, and invisibly returns the source command to use on the other machine if quiet is `TRUE`. +#' If `output` is `local`, an R script file (`.R`) is saved to the current working +#' directory, which can be transferred manually to another machine. +#' +#' If `output` is `gist`, the script is uploaded as a GitHub gist (requires the +#' `gistr` package). +#' +#' @return Invisibly returns the command to run on the target machine. If `quiet = FALSE`, +#' also prints instructions to console. #' @export #' transfer_packages <- function(library = .libPaths()[1], output = "online", expiry = "7d", filename = "transfer_packages", list_remotes = TRUE, quiet = FALSE) { @@ -30,13 +43,13 @@ Sys.sleep(5) if(list_remotes & nrow(gh)>0) { to_install <- glue::glue_col("{to_install} message('These pacakges were installed from a repository other than CRAN, and have not been reinstalled: {blue {glue::glue_collapse(gh$repo, sep = ', ', last = ' and ')}}\n', fill = T)") - } + } green <- crayon::green blue <- crayon::blue if (output == "online") { - r <- httr::POST(glue::glue("https://file.io/?expires={expiry}"), body = list(text = to_install)) + r <- httr::POST(glue::glue("https://file.io/?expires={expiry}"), body = list(file = to_install)) link <- httr::content(r)$link cmd <- glue::glue("source('{link}')") message(glue::glue_col("Now run {green {cmd}} on the other machine to install the packages. @@ -44,10 +57,10 @@ Sys.sleep(5) } else if (output == "local") { write(to_install, file = glue::glue("{filename}.R")) - if (!quiet) { + if (!quiet) { cmd <- glue::glue("source('{filename}.R'") message(glue::glue_col("Now copy the file {filename}.R to the other machine and run {green {cmd}} to install the packages.")) - } + } } else if (output == "gist") { if(requireNamespace("gistr", quietly = T)) { @@ -55,7 +68,7 @@ Sys.sleep(5) } else { stop("The gistr package is not available. Please install it.") - } + } # This should only be used by advanced users, do you want to continue? Y/n? # Public or private? } @@ -73,7 +86,7 @@ Sys.sleep(5) output <- paste0("'", gh[which(gh$type == "github"), 2], "/", gh[which(gh$type == "github"), 3], "'", collapse = ", ") message(glue::glue_col("To install those from github, run: {blue remotes::install_github(c({output}))}")) - } + } } } invisible(cmd) From fd2bbc8648d1f09d55669912cf07e0de353882fb Mon Sep 17 00:00:00 2001 From: Sam Rogers <7007561+rogerssam@users.noreply.github.com> Date: Mon, 16 Feb 2026 12:33:56 +1030 Subject: [PATCH 2/8] Adding latest updates --- R/BiometryTools_master_file.R | 1055 +++++++++++++++++++++++++++++++++ 1 file changed, 1055 insertions(+) create mode 100644 R/BiometryTools_master_file.R diff --git a/R/BiometryTools_master_file.R b/R/BiometryTools_master_file.R new file mode 100644 index 0000000..606df91 --- /dev/null +++ b/R/BiometryTools_master_file.R @@ -0,0 +1,1055 @@ + +## Conversion function for Efficiency and Responsiveness BLUPs in +## Treatment x Site x Variety experiments + +## The function assumes you have a Treatment x Site factor that is a composite of +## treatments and sites. The function requires no specific ordering of the factor levels. + +## Arguments: +## model: Final full Treatment x Site x Variety model +## Env: Treatment x Site x Variety term +## levs: Named treatment levels used in transformation. i.e c("Treat1", "Treat2") +## would regress Treat2 on Treat1 +## sep: separator used for Treat x Site names (if multi-x model), if not present assumes single section + + +randomRegress <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", pev = TRUE, ...){ + if(is.null(levs)) + stop("Treatment levels cannnot be NULL.") + evnam <- unlist(strsplit(Env, ":")) + enam <- evnam[1]; vnam <- evnam[2] + penv <- gsub(":",".*", Env) + rterm <- attr(terms.formula(model$call$random), "term.labels") + rterm <- rterm[grep(penv, rterm)] + print(rterm) + if(substring(rterm, 1, 2) == "fa"){ + sumfa <- fa.asreml(model, trunc.char = NULL) + pvals <- sumfa$blups[[rterm]]$blups[,1:3] + Gmat <- sumfa$gammas[[rterm]]$Gmat + } + else { + pred <- predict(model, classify = Env, only = Env, vcov = TRUE, ...) + Gmat <- summary(model, vparameters = TRUE)$vparameters[[Env]] + pvals <- pred$pvals + names(pvals)[3] <- "blup" + } + tsnams <- dimnames(Gmat)[[2]] + if(length(grep(sep, tsnams))){ + st <- strsplit(tsnams, split = sep) + tnam <- sapply(st, function(el) el[1]) + snam <- sapply(st, function(el) el[2]) + if(!all(levs %in% c(snam, tnam))) + stop("Treatment levels do not exist in ", enam) + if(all(levs %in% snam)){ + tnam <- snam + snam <- sapply(st, function(el) el[1]) + } + } else { + tnam <- tsnams + snam <- rep("Single", length(tnam)) + } + usnams <- unique(snam) + tmat <- diag(nrow(Gmat)) + beta <- sigr <- c() + blist <- list() + for(i in 1:length(usnams)){ + inds <- (1:length(snam))[snam %in% usnams[i]] + names(inds) <- tnam[inds] + whl <- (1:2)[levs %in% names(inds)] + if(length(whl) == 2){ + tind <- inds[levs] + mat <- Gmat[tind, tind] + beta[i] <- mat[1,2]/mat[1,1] + rho <- mat[1,2]/sqrt(mat[1,1]*mat[2,2]) + sigr[i] <- (mat[2,2]*(1 - rho^2)) + tmat[tind[2], tind[1]] <- - beta[i] + imat <- diag(2) + imat[2,1] <- - beta[i] + blow <- pvals$blup[pvals[[enam]] %in% tsnams[tind[1]]] + bhigh <- pvals$blup[pvals[[enam]] %in% tsnams[tind[2]]] + bresp <- bhigh - beta[i]*blow + blist[[i]] <- cbind.data.frame(blow, bhigh, bresp) + lowi <- (1:nrow(pvals))[pvals[[enam]] %in% tsnams[tind[1]]] + highi <- (1:nrow(pvals))[pvals[[enam]] %in% tsnams[tind[2]]] + pevm <- kronecker(imat, diag(length(blow))) %*% as.matrix(pred$vcov[c(lowi,highi),c(lowi,highi)]) %*% kronecker(t(imat), diag(length(blow))) + if(!pev) + pevm <- kronecker(diag(c(mat[1,1],sigr[i])), diag(length(blow))) - pevm + bvar <- pevm[(length(blow) + 1):ncol(pevm),(length(blow) + 1):ncol(pevm)] + sed <- apply(combn(diag(bvar), 2), 2, sum) - 2*bvar[lower.tri(bvar)] + sed[sed < 0] <- NA + blist[[i]]$HSD <- (mean(sqrt(sed), na.rm = TRUE)/sqrt(2))*qtukey(0.95, length(blow), df = length(blow) - 2) + } else { + slevs <- levs[whl] + tind <- inds[slevs] + if(whl == 1) + blist[[i]] <- cbind.data.frame(blow = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bhigh = NA, bresp = NA) + else blist[[i]] <- cbind.data.frame(blow = NA, bhigh = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bresp = NA) + } + } + TGmat <- tmat %*% Gmat %*% t(tmat) + tsnams <- gsub(levs[2], "resp", tsnams) + tsnams <- gsub(levs[1], "eff", tsnams) + dimnames(TGmat) <- list(tsnams, tsnams) + blups <- do.call("rbind.data.frame", blist) + names(blups)[1:3] <- c(levs, "resp") + glev <- unique(as.character(pvals[[vnam]])) + blups <- cbind.data.frame(Site = rep(usnams, each = length(glev)), Variety = rep(glev, length(usnams)), blups) + list(blups = blups, TGmat = TGmat, Gmat = Gmat, beta = beta, sigr = sigr, tmat = tmat) +} + +## BLUEs regression + +fixedRegress <- function(model, term = "Treatment:Genotype", by = NULL, levs = NULL, simple = TRUE){ + pterm <- term + if(is.null(levs)) + stop("Treatment levels cannnot be NULL.") + term <- unlist(strsplit(term, ":")) + if(length(term) < 2) + stop("Argument \"term\" needs at least two variables.") + pred <- predict(model, classify = pterm, vcov = TRUE) + whna <- !is.na(pred$pvals$predicted.value) + pv <- pred$pvals[whna,] + vc <- as.matrix(pred$vcov)[whna,whna] + wht <- unlist(sapply(pv[,term], function(el, levs) all(levs %in% levels(el)), levs)) + if(!any(wht)) + stop("Some levels specified in \"levs\" do not exist in term variables.") + tnam <- term[wht] + if(!is.null(by)){ + bys <- unlist(strsplit(by, ":")) + if(!all(bys %in% term)) + stop("Some variables in argument \"by\" are not in \"term\".") + if(tnam %in% bys) + stop("Levels specified in \"levs\" cannot be in \"by\" variable.") + rterm <- term[!(term %in% c(tnam, bys))] + if(!length(rterm)) + stop("There are no variables to form regression between specified levels.") + if(length(rterm) > 1) + pv[["regress"]] <- apply(pv[,rterm], 1, function(el) paste(el, collapse = ":")) + else pv[["regress"]] <- pv[[rterm]] + if(length(bys) > 1) + pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) + uby <- as.character(pv[[by]]) + um <- unique(uby) + } else { + uby <- rep(tnam, nrow(pv)) + um <- unique(uby) + rterm <- term[!(term %in% tnam)] + pv[["regress"]] <- pv[[rterm]] + } + resp.list <- list() + for(i in 1:length(um)){ + inds <- uby %in% um[i] + pvt <- pv[inds,] + pv1 <- pvt[wh1 <- pvt[[tnam]] %in% levs[1],] + pv2 <- pvt[wh2 <- pvt[[tnam]] %in% levs[2],] + whr <- intersect(pv1[["regress"]], pv2[["regress"]]) + if(length(whr) > 5){ + wt1 <- pv1[["regress"]] %in% whr + wt2 <- pv2[["regress"]] %in% whr + pcont <- pv1$predicted.value[wt1] + ptreat <- pv2$predicted.value[wt2] + regt <- pv2[["regress"]][wt2] + if(!simple){ + vct <- vc[inds, inds] + s22 <- vct[wh2,wh2] + s22 <- s22[wt2,wt2] + s11 <- vct[wh1,wh1] + s11 <- s11[wt1,wt1] + s21 <- vct[wh2,wh1] + s21 <- s21[wt2,wt1] + resp <- ptreat - s21 %*% solve(s11) %*% pcont + resp.var <- s22 - s21 %*% solve(s11) %*% t(s21) + rdf <- model$nedf + } else { + lmr <- lm(ptreat ~ pcont) + resp <- lmr$residuals + xm <- model.matrix( ~ pcont) + vmat <- (diag(length(ptreat)) - xm %*% solve(t(xm)%*%xm) %*% t(xm)) + resp.var <- ((vmat) %*% t(vmat))*(summary(lmr)$sigma^2) + rdf <- lmr$df.residual + } + std.error <- sqrt(diag(resp.var)) + sed <- sqrt(apply(combn(diag(resp.var), 2), 2, sum) - 2*resp.var[lower.tri(resp.var)]) + respd <- cbind.data.frame(Split = um[i], Regress.Var = regt) + respd[[levs[1]]] <- pcont + respd[[levs[2]]] <- ptreat + respd$reponse.index <- resp + respd$std.error <- std.error + respd$HSD <- (mean(sed)/sqrt(2))*qtukey(0.95, length(ptreat), df = rdf) + respd$sed <- mean(sed) + resp.list[[i]] <- respd + } else warning("Some treatment combinations in ", um[i]," have less than 5 matching observations and have been omitted.\n") + } + resp.list <- resp.list[!sapply(resp.list, is.null)] + do.call("rbind.data.frame", resp.list) +} + +compare <- function(model, term = "Treatment:Genotype", by = NULL, omit.string = NULL, type = "HSD", pev = TRUE, fw.method = "none", ...){ + pred <- predict(model, classify = term, vcov = TRUE, ...) + terms <- unlist(strsplit(term, ":")) + pv <- pred$pvals + inds <- !is.na(pv$predicted.value) + if(!pev & all(terms %in% all.vars(model$call$random))){ + varm <- summary(model, vparameters = TRUE)$vparameters[[term]] + if(length(terms) > 1) + len <- table(pv[,1])[1] + else len <- nrow(pv) + vara <- kronecker(varm, diag(len)) - pred$vcov + vara[inds, inds] + } else vara <- pred$vcov[inds, inds] + pv <- pv[inds,] + section <- FALSE + if(!is.null(by)){ + bys <- unlist(strsplit(by, ":")) + if(all(terms %in% bys)) + stop("Argument \"by\" indicates no multiple comparisons are being made.") + if(!all(bys %in% terms)) + stop("Some terms in argument \"by\" are not in \"term\".") + if(length(bys) > 1) + pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) + } else{ + by <- term + pv[[by]] <- by + } + if(!is.null(omit.string)){ + oind <- grep(omit.string, as.character(pv[[gnam]])) + if(length(oind)){ + pv <- pv[-oind,] + sed <- sed[-oind,-oind] + } + } + sst <- as.character(pv[[by]]) + um <- unique(sst) + if(type %in% c("HSD","LSD")){ + tsd <- avsed <- c() + for(k in 1:length(um)){ + sinds <- sst %in% um[k] + svar <- vara[sinds, sinds] + avsed[k] <- sqrt(mean(apply(combn(diag(svar), 2), 2, sum) - 2*svar[lower.tri(svar)])) + if(type == "HSD") + tsd[k] <- (avsed[k]/sqrt(2))*qtukey(0.95, length(sinds), model$nedf) + else tsd[k] <- avsed[k]*qt(0.025, df = model$nedf, lower.tail = FALSE) + } + pv <- cbind.data.frame(pv[,1:(length(terms) + 2)]) + pv[[type]] <- rep(tsd, times = table(sst)) + pv[["sed"]] <- rep(avsed, times = table(sst)) + } + else if(type %in% "PVAL"){ + pvs <- split(pv, pv[[by]]) + yvar <- deparse(model$call$fixed[[2]]) + xvar <- labels(terms(as.formula(model$call$fixed))) + fix.form <- as.formula(paste(yvar, " ~ ", xvar[length(xvar)], " - 1", sep = "")) + model <- update(model, fixed. = fix.form, Cfixed = TRUE) + coefs <- model$coefficients$fixed + cinds <- grep(paste(terms, collapse = ".*"), rownames(coefs)) + coefs <- coefs[cinds,,drop = FALSE] + for(k in 1:length(um)){ + umt <- paste(strsplit(um[k], ":")[[1]], collapse = ".*") + sind <- cinds[grep(umt, rownames(coefs))] + scf <- coefs[grep(umt, rownames(coefs)),] + sna <- scf == 0 + aind <- sind[!sna] + pvt <- pvs[[k]] + cb <- t(combn(nrow(pvt), 2)) + mat <- matrix(0, nrow = nrow(cb), ncol = nrow(pvt)) + mat[cbind(1:nrow(mat), cb[,1])] <- 1 + mat[cbind(1:nrow(mat), cb[,2])] <- -1 + cc <- list(coef = aind, type = "con", comp = mat) + wt <- waldTest(model, list(cc))$Contrasts + pval <- wt$"P-Value" + add <- matrix(0, nrow = nrow(pvt), ncol = nrow(pvt)) + add[lower.tri(add)] <- stats::p.adjust(pval, method = fw.method) + add <- add + t(add) +# add <- add[ord, ord] + dimnames(add)[[2]] <- apply(pvt[,terms], 1, function(el) paste(el, collapse = ":")) + paste(as.character(pvt[[terms[[1]]]]), as.character(pvt[[terms[2]]]), sep = ":") + pvs[[k]] <- cbind.data.frame(pvs[[k]][,1:(length(terms) + 2)], add) + } + pv <- pvs + } else stop("Please use one of the allowable types, \"HSD\",\"LSD\",\"PVAL\"") + pv +} + +## associate BLUEs with BLUPs form predict calls of the same model + +associate <- function(model, ran.term = "Site:Genotype", fix.term = "Site:Type", ...){ + rnams <- all.vars(as.formula(paste("~ ", ran.term, sep = ""))) + fnams <- all.vars(as.formula(paste("~ ", fix.term, sep = ""))) + if(length(rnams) > 1){ + labs <- attr(terms(as.formula(model$call$random)), "term.labels") + iterm <- labs[grep(paste(rnams[1], "*.*", rnams[2], sep = ""), labs)] + uv <- sapply(strsplit(iterm, "\\("), "[", 1) + if(uv == "fa") + predr <- predict(model, classify = ran.term, only = iterm, ...) + else if(uv %in% c("diag","corh","corgh","us")) + predr <- predict(model, classify = ran.term, only = ran.term, ...) + tab <- table(c(rnams, fnams)) + dnams <- names(tab)[tab == 1] + snams <- names(tab)[tab == 2] + rnams <- rnams[rnams != snams] + } else { + predr <- predict(model, classify = ran.term, only = ran.term, ...) + dnams <- c(rnams,fnams) + } + pr <- predr$pvals + names(pr) <- gsub("predicted.value", "blups", names(pr)) + names(pr) <- gsub("std.error", "blups.std.error", names(pr)) + predf <- predict(model, classify = fix.term, ...) + pf <- predf$pvals + pf <- pf[!is.na(pf$predicted.value),] + names(pf) <- gsub("predicted.value", "blues", names(pf)) + names(pf) <- gsub("std.error", "blues.std.error", names(pf)) + dat <- eval(model$call$data) + datr <- dat[,dnams] + datr <- datr[!duplicated(datr),] + pri <- merge(pr[-ncol(pr)], datr, by = rnams, all.x = TRUE) + pall <- merge(pri, pf[-c(ncol(pf) - 1, ncol(pf))], by = fnams, all.x = TRUE) + if(any(is.na(pall$blues))){ + if(length(rnams) > 1) + spv <- pall[[snams]] + else spv <- factor(rep(1, nrow(pall))) + palls <- split(pall, spv) + pall <- do.call("rbind", lapply(palls, function(el){ + nab <- is.na(el$blues) + el$Type.blue <- as.character(el$Type) + el$Type.blue[nab] <- "Site Average" + el$blues[nab] <- mean(el$blues[!nab]) + el})) + } + pall +} + +## prune function from pedicure + +prune <- function(ped, data, gen = NULL) +{ + if(is.na(which <- match(names(ped)[1],names(data)))) + stop(paste("Cannot find",names(ped)[1],"in data")) + if(any(is.na(match(as.character(data[[which]]), as.character(ped[,1]))))) + warning("There are individuals in 'data' that are absent in 'ped'") + + data <- as.numeric(!is.na(match(as.character(ped[,1]), + as.character(data[[which]])))) + mmd <- data.frame(id = 1:nrow(ped), + dam = match(ped[,2],ped[,1],nomatch = 0), + sire = match(ped[,3],ped[,1],nomatch = 0)) + print(mmd) + if(is.null(gen)) { + gen <- max(countGen(mmd)) + } + what <- pedigree::trimPed(mmd, data, gen) + return(ped[what,]) +} + +## heritability for multi/single environment trials + +herit.asreml <- function(model, term = "SYear:Genotype", ...){ + dat <- eval(model$call$data) + if(length(grep(":", term))){ + terms <- all.vars(as.formula(paste("~ ", term, sep = ""))) + labs <- attr(terms(as.formula(model$call$random)), "term.labels") + iterm <- labs[grep(paste(terms[1], "*.*", terms[2], sep = ""), labs)] + uv <- sapply(strsplit(iterm, "\\("), "[", 1) + if(uv == "fa"){ + pred <- predict(model, classify = term, only = iterm, sed = TRUE, ...) + sumfa <- fa.asreml(model, trunc.char = NULL) + gam <- diag(sumfa$gammas[[grep(paste(terms[1], "*.*", terms[2], sep = ""), names(sumfa$gammas))]]$Gmat) + } else if(uv %in% c("diag","corh","corgh","us")){ + pred <- predict(model, classify = term, only = term, sed = TRUE, ...) + if(uv %in% c("diag")) + gam <- summary(model, vparameters = TRUE)$vparameters[[term]] + else + gam <- diag(summary(model, vparameters = TRUE)$vparameters[[term]]) + } + else stop("The function does not understand this asreml function.") + site <- pred$pvals[[terms[1]]] + levs <- levels(site) + avsed <- c() + for(i in 1:length(levs)){ + inds <- (1:length(site))[as.character(site) %in% levs[i]] + sedm <- pred$sed[inds, inds] + sedm <- sedm[upper.tri(sedm)] + avsed[i] <- mean(sedm) + } + } else { + pred <- predict(model, classify = term, only = term, sed = TRUE, ...) + con <- model$vparameters.con[grep("units\\!R", names(model$vparameters.con))] + if(length(con) == 0 || (con != 4)) + gam <- model$vparameters[grep(term, names(model$vparameters))]*model$sigma2 + else gam <- model$vparameters[grep(term, names(model$vparameters))] + avsed <- pred$avsed[2] + levs <- term + } + h2 <- 1 - (avsed^2)/(2*gam) + names(h2) <- levs + h2 +} + +## outlier addition function + +outlier.down <- function(data, model, cutoff = 3){ + ss <- names(model) + inds <- 1:nrow(data) + for(i in 1:length(ss)){ + str <- abs(model[[ss[i]]]$aom$R[,2]) + r <- str > cutoff + wh <- inds[r] + wh <- wh[!is.na(wh)] + if(length(wh)){ + ps <- paste(ss[i], "o", sep = ".") + num <- 0 + if(length(wt <- grep(ps, names(data)))){ + num <- sapply(strsplit(names(data)[wt], "\\."), function(el) el[length(el)]) + num <- as.numeric(num[length(num)]) + } + print(wh) + for(j in 1:length(wh)){ + nam <- paste(ps, j + num, sep = ".") + v <- rep(0, nrow(data)) + v[wh[j]] <- 1 + data[[nam]] <- v + } + } + } + data +} + +## outlier removal function + +outlier.rem <- function(data, model, cutoff = 3){ + ss <- names(model) + inds <- 1:nrow(data) + out <- rep(FALSE, length(model)) + names(out) <- ss + for(i in 1:length(ss)){ + trait <- data[[ss[i]]] + str <- abs(model[[ss[i]]]$aom$R[,2]) + r <- str > cutoff + wh <- inds[r] + wh <- wh[!is.na(wh)] + if(length(wh)){ + print(wh) + data[[ss[i]]][wh] <- NA + out[i] <- TRUE + } + } + list(data = data, out = out) +} + +pad.data <- function(data, pattern = "Row:Column", split = "Block", keep = 4, fill = NULL){ + pat <- unlist(strsplit(pattern, ":")) + if(!(split %in% names(data))) + stop("split argument not in data") + if(!all(pat %in% names(data))) + stop("One or more of the variables in pattern argument not in data") + spd <- split(data, data[[split]]) + spd <- lapply(spd, function(el, pat){ + temp <- el + temp <- cbind.data.frame(lapply(temp, function(el){ if(is.factor(el)) factor(el) else el})) + temp$add <- "old" + tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) + wh <- which(tabs == 0, arr.ind = TRUE) + if(dim(wh)[1] > 0){ + tp <- temp[1:nrow(wh),] + tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) + tp[,keep] <- temp[1:nrow(wh),keep] + tp[[pat[1]]] <- factor(rownames(tabs)[wh[,1]]) + tp[[pat[2]]] <- factor(colnames(tabs)[wh[,2]]) + if(!is.null(fill)) + tp[,fill] <- NA + tp$add <- "new" + temp <- rbind.data.frame(temp, tp) + } + temp + }, pat) + ad <- do.call("rbind.data.frame", spd) + ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) + ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) + ad[order(ad[[pat[1]]],ad[[pat[2]]]),] +} + +extract <- function(data, pattern = "Row:Column", match = "DH", split = "Block", pad = TRUE, keep = 4, fill = NULL){ + pat <- unlist(strsplit(pattern, ":")) + if(!(split %in% names(data))) + stop("split argument not in data") + if(!all(pat %in% names(data))) + stop("One or more of the variables in pattern argument not in data") + spd <- split(data, data[[split]]) + spd <- lapply(spd, function(el, match, pat, pad){ + temp <- el[as.character(el$Type) %in% match,] + print(dim(temp)) + rr <- range(as.numeric(as.character(temp[,pat[1]]))) + rc <- range(as.numeric(as.character(temp[,pat[2]]))) + print(rr) + print(rc) + elr <- (1:nrow(el))[el[[pat[1]]] %in% as.character(rr[1]:rr[2])] + elc <- (1:nrow(el))[el[[pat[2]]] %in% as.character(rc[1]:rc[2])] + ela <- intersect(elr, elc) + temp <- el[ela[order(ela)],] + temp <- cbind.data.frame(lapply(temp, function(el){ if(is.factor(el)) factor(el) else el})) + if(pad){ + temp$add <- "old" + tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) + wh <- which(tabs == 0, arr.ind = TRUE) + if(length(wh)){ + whn <- pmatch(pat, names(temp)) + tp <- temp[1:nrow(wh),] + tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) + tp[,keep] <- temp[1:nrow(wh),keep] + tp[[pat[1]]] <- factor(rownames(tabs)[wh[,1]]) + tp[[pat[2]]] <- factor(colnames(tabs)[wh[,2]]) + if(!is.null(fill)) + tp[,fill] <- "Blank" + tp$add <- "new" + temp <- rbind.data.frame(temp, tp) + } + } + temp + }, match, pat, pad) + ad <- do.call("rbind.data.frame", spd) + ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) + ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) + ad[order(ad[[pat[1]]],ad[[pat[2]]]),] +} + +## fine map a wgaim object + +fineMap <- function(model, intervalObj, mark = NULL, flanking = 50, exclusion.window = 10000, ...){ + resp <- deparse(model$call$fixed[[2]]) + phenoData <- eval(parse(text = paste(resp, ".data", sep = ""))) + if (missing(intervalObj)) + stop("intervalObj is a required argument") + if (!inherits(intervalObj, "cross")) + stop("intervalObj is not of class \"cross\"") + if(is.null(mark)) + stop("mark argument must be non-NULL.") + if (model$QTL$type == "interval") + gdat <- lapply(intervalObj$geno, function(el) el$interval.data) + else gdat <- lapply(intervalObj$geno, function(el) el$imputed.data) + genoData <- do.call("cbind", gdat) + gterm <- model$QTL$diag$genetic.term + state <- model$QTL$diag$state + method <- model$QTL$method + dimnames(genoData) <- list(as.character(intervalObj$pheno[[gterm]]), names(state)) + genoData <- genoData[rownames(genoData) %in% as.character(phenoData[[gterm]]),] + fm <- find.markerpos(intervalObj, mark) + chrs <- sapply(strsplit(names(state), "\\."), "[", 2) + chr.ind <- chrs %in% fm$chr + state.chri <- state[chrs %in% fm$chr] + mapc <- pull.map(intervalObj, fm$chr)[[1]] + qind <- (1:length(mapc))[names(mapc) %in% mark] + mark.qtl <- gsub("Chr\\.", "X.", names(state.chri)[qind]) + ql <- ifelse(qind - flanking <= 0, 1, qind - flanking) + qr <- ifelse(qind + flanking > length(mapc), length(mapc), qind + flanking) + state.chri[ql:qr] <- 1 + genoChr <- genoData[,names(state.chri)[ql:qr]] + colnames(genoChr) <- gsub("Chr\\.", "X.", colnames(genoChr)) + tmp <- cbind.data.frame(rownames(genoData), genoChr) + colnames(tmp)[1] <- gterm + phenoData <- phenoData[,!(names(phenoData) %in% mark.qtl)] + phenoData$ord <- 1:nrow(phenoData) + phenoData <- merge(phenoData, tmp, by = gterm, all.x = TRUE) + phenoData <- phenoData[order(phenoData$ord),] + k <- 1 + pvalue <- lod <- c() + for(i in ql:qr){ + wind <- abs(mapc[i] - mapc) <= exclusion.window + state.chr <- state.chri + state.chr[wind] <- 0 + state[chr.ind] <- state.chr + mout <- (1:ncol(genoData))[!as.logical(state)] + genoSub <- genoData[,-mout] + if(ncol(genoSub) > nrow(genoSub)){ + cov.env <- wgaim:::constructCM(genoSub) + covObj <- cov.env$relm + } else { + tempObj <- cbind.data.frame(covObj[,1], genoSub) + names(tempObj)[1] <- names(covObj)[1] + covObj <- tempObj + } + assign("covObj", covObj, envir = parent.frame()) + mark.i <- colnames(genoChr)[k] + print(mark.i) + if(method == "random"){ + temp.form <- update.formula(model$call$random, as.formula(paste("~ . - ", mark.qtl, sep = ""))) + temp.form <- update.formula(temp.form, as.formula(paste("~ . + ", mark.i, sep = ""))) + tempmodel <- wgaim:::vModify(model, gterm) + tempmodel <- update.asreml(tempmodel, random. = temp.form, data = phenoData, ...) + } + else { + fix.form <- formula(paste(". ~ . +", mark.i, "-", mark.qtl, sep = "")) + tempmodel <- update.asreml(model, fixed. = fix.form, data = phenoData, ...) + } + cf <- tempmodel$coefficients[[method]] + whr <- grep(mark.i, rownames(cf)) + mcf <- tempmodel$coefficients[[method]][whr, 1] + vcf <- tempmodel$vcoeff[[method]][whr] + zrat <- mcf/sqrt(vcf * tempmodel$sigma2) + #zrat <- mcf/sqrt(vcf) + pvalue[k] <- round((1 - pchisq(zrat^2, df = 1)), 4) + lod[k] <- round(0.5 * log(exp(zrat^2), base = 10), 4) + print(c(pvalue[k], lod[k])) + k <- k + 1 + } + cbind.data.frame(mark = names(mapc)[ql:qr], dist = mapc[ql:qr], pvalue = pvalue, LOD = lod) +} + + +## manhattan plot using ggplot + +manhattan <- function(mlist, cross, chr.in = NULL, annotate = TRUE, ...){ + nams <- names(mlist) + outs <- lapply(mlist, function(el){ + temp <- el$QTL$diag$oint[[1]] + names(temp) <- gsub("Chr\\.","", names(temp)) + temp + }) + if(!is.null(chr.in)){ + cross <- subset(cross, chr = chr.in) + for(i in 1:length(outs)){ + onam <- sapply(strsplit(names(outs[[i]]), "\\."), "[", 1) + outs[[i]] <- outs[[i]][onam %in% chr.in] + } + } + dat <- cbind.data.frame(value = unlist(outs)) + dat$nout <- sapply(outs, names) + len <- sapply(outs, length)[1] + dat$Name <- rep(nams, each = len) + chr <- rep(names(cross$geno), times = nmar(cross)) + dat$chr <- factor(rep(chr, length(nams))) + dist <- lapply(cross$geno, function(el) { + tel <- c(100000, diff(el$map)) + names(tel)[1] <- names(el$map)[1] + tel + }) + dist <- cumsum(unlist(dist)) + sp <- unlist(lapply(split(dist, chr), function(el) min(el) + diff(range(el))/2)) + spc <- unlist(lapply(split(dist, chr), function(el) max(el) + 500000)) + dat$dist <- rep(dist, length(nams)) + dat$chr.g <- dat$chr + levels(dat$chr.g) <- c(rep(c("g1","g2"), 10), "g1") + cols <- brewer.pal(3, "Set1")[1:2] + gp <- ggplot(dat, aes(x = dist, y = value)) + facet_wrap(~ Name, ncol = 1, scales = "free_y") + + geom_vline(xintercept = spc, colour = "gray80") + + geom_point(aes(colour = chr.g)) + + scale_y_continuous(expand = c(0.02,0), breaks = seq(0,100, by = 10)) + + scale_x_continuous(breaks = sp, labels = names(cross$geno), expand = c(0.02,0)) + + xlab("") + ylab("Outlier Statistic") + scale_color_manual(values = cols) + + theme(legend.position = "none",axis.text = element_text(size = 10), panel.background = element_blank(), panel.border = element_rect(colour = "gray80", fill = NA, size = 1.1), panel.grid.major.y = element_line(colour = "gray90", size = 1.2), panel.grid.minor.y = element_line(colour = "gray90", size = 0.8), panel.grid.major.x = element_blank(), axis.title = element_text(size = 20), strip.text = element_text(size=10)) + if(annotate){ + qtl <- lapply(mlist, function(el) gsub("Chr\\.", "", el$QTL$qtl)) + qtl.dat <- cbind.data.frame(Name = rep(nams, times = sapply(qtl, length))) + qtl.dat$nout <- unlist(qtl) + if(!is.null(chr.in)){ + qnam <- sapply(strsplit(as.character(qtl.dat$nout), "\\."), "[", 1) + print(qnam) + qtl.dat <- qtl.dat[qnam %in% chr.in,] + } + print(qtl.dat) + subexpr <- paste(dat$Name, dat$nout, sep = ":") %in% paste(qtl.dat$Name, qtl.dat$nout, sep = ":") + ann <- subset(dat, subexpr) + print(ann) + gp + geom_text(data = ann, aes(label = nout), size = 5) + } +} + +## check whether genetic clones are in the experiment + +phenClones <- function(model, cross, matching = "Genotype", Envir = NULL, no.samp = 1000, sep = "_"){ + mg <- as.character(cross$pheno[[matching]]) + mgs <- lapply(mg, function(el, sep){ + el <- unlist(strsplit(el, sep)) + if(length(el) > 1) + t(combn(el, 2)) + else NULL + }, sep = sep) + mgs <- mgs[!sapply(mgs, is.null)] + mgd <- do.call("rbind", mgs) + if(!is.null(Envir)){ + iterm <- paste(Envir, matching, sep = ":") + pvals <- predict(model, classify = iterm, only = iterm)$pvals + pvlist <- split(pvals, pvals[[Envir]]) + levs <- levels(pvals[[Envir]]) + } else + pvlist <- list(predict(model, classify = matching, only = matching)$predictions$pvals) + corlist <- list() + for(j in 1:length(pvlist)){ + pvt <- pvlist[[j]] + cg1 <- pvt$predicted.value[pmatch(mgd[,1], pvt[[matching]], duplicates.ok = TRUE)] + cg2 <- pvt$predicted.value[pmatch(mgd[,2], pvt[[matching]], duplicates.ok = TRUE)] + cor.samp <- c() + for(i in 1:no.samp) { + ts <- sample(pvt$predicted.value, dim(mgd)*2, replace = FALSE) + cor.samp[i] <- cor(ts[1:(length(ts)/2)], ts[(length(ts)/2 + 1):length(ts)]) + } + df <- dim(mgd)[1] - 2 + cs <- c(cor.samp, cor(cg1, cg2, use = "complete.obs")) + pv <- 1 - pf((cs^2)*df/(1 - cs^2), 1, df) + pva <- pv[1:(length(pv) - 1)] + corlist[[j]] <- c(length(pva[pva < 0.05])/no.samp, cs[length(cs)], pv[length(pv)]) + } + res <- cbind.data.frame(t(do.call("cbind", corlist))) + names(res) <- c("Type1","Correlation","P-value") + if(!is.null(Envir)) + res <- cbind.data.frame(levs, res) + res +} + +## fix clones if they are in the experiment + +phenfixClones <- function(data, cross, matching = "Genotype", sep = "_"){ + mg <- as.character(cross$pheno[[matching]]) + mgs <- lapply(mg, function(el, sep){ + el <- unlist(strsplit(el, sep)) + if(length(el) > 1) + el + else NULL + }, sep = sep) + mgs <- mgs[!sapply(mgs, is.null)] + for(i in 1:length(mgs)){ + levs <- levels(data[[matching]]) + levels(data[[matching]])[levs %in% mgs[[i]]] <- paste(mgs[[i]], collapse = sep) + } + data +} + +## FAST: overall performance and stability for intrepreting Factor Analytic models + +fast <- function(model, term = "fa(Site, 4):Genotype", ...){ + dat <- eval(model$call$data) + str <- strsplit(term, ":")[[1]] + sterm <- sapply(strsplit(gsub("fa\\(|\\))", "", str[grep("fa", str)]), ","), "[", 1) + gterm <- str[-grep("fa", str)] + sfa <- fa.asreml(model, ...) + scores <- sfa$blups[[term]]$scores + lvar <- cbind.data.frame(sfa$gammas[[term]]$"rotated loads", sfa$gammas[[term]]$"specific var") + scores <- do.call("cbind.data.frame", tapply(scores$blupr, scores[[sterm]], function(el) el)) + names(scores) <- ns <- paste("score", 1:ncol(scores), sep = "") + nk <- dim(scores)[2] + scores <- cbind.data.frame(levels(dat[[gterm]]), scores) + names(scores)[1] <- gterm + sa <- scores[rep(1:nrow(scores), nrow(lvar)),] + lvar <- lvar[rep(1:nrow(lvar), each = nrow(scores)),] + nl <- paste("loads", 1:(ncol(lvar) - 1), sep = "") + names(lvar) <- c(nl, "spec.var") + ls <- cbind.data.frame(rep(levels(dat[[sterm]]), each = nrow(scores)), lvar, sa) + names(ls)[1] <- sterm + for(i in 1:nk){ + ts <- paste("fitted", i, sep = "") + ls[[ts]] <- ls[[ns[i]]]*ls[[nl[[i]]]] + } + print(names(ls)) + ls$CVE <- rowSums(ls[,grep("fitted", names(ls)), drop = FALSE]) + ls$VE <- ls$CVE + ls[,"spec.var"] + ls$OP <- mean(ls$loads1)*ls$score1 + if(nk > 1){ + ls$dev <- ls$CVE - ls$fitted1 + ls$stab <- sqrt(tapply(ls$dev^2, ls[[gterm]], mean)[as.character(ls[[gterm]])]) + } + ls +} + +fastIC <- function(model, term = "fa(Site, 4):Genotype", ic.num = 2, ...){ + dat <- eval(model$call$data) + str <- strsplit(term, ":")[[1]] + sterm <- sapply(strsplit(gsub("fa\\(|\\))", "", str[grep("fa", str)]), ","), "[", 1) + if(length(grep("vm", str))) + gterm <- sapply(strsplit(gsub("vm\\(|\\))", "", str[grep("vm", str)]), ","), "[", 1) + else + gterm <- str[-grep("fa", str)] + sfa <- fa.asreml(model, ...) + scores <- sfa$blups[[term]]$scores + + scored <- do.call("cbind.data.frame", tapply(scores$blupr, scores[[sterm]], function(el) el)) + names(scored) <- ns <- paste("score", 1:ncol(scored), sep = "") + nk <- dim(scored)[2] + scored <- cbind.data.frame(factor(unique(scores[[gterm]])), scored) + names(scored)[1] <- gterm + loads <- sfa$gammas[[term]]$"rotated loads" + spv <- sfa$gammas[[term]]$"specific var" + lvar <- cbind.data.frame(loads, spv) + sa <- scored[rep(1:nrow(scored), nrow(lvar)),] + lvar <- lvar[rep(1:nrow(lvar), each = nrow(scored)),] + nl <- paste("loads", 1:(ncol(lvar) - 1), sep = "") + names(lvar) <- c(nl, "spec.var") + iclass <- apply(loads[,1:ic.num, drop = FALSE], 1, function(el) paste(ifelse(el > 0, "p", "n"), collapse = "")) + ls <- cbind.data.frame(iclass = factor(rep(iclass, each = nrow(scored))), lvar, sa) + env <- factor(rownames(loads), levels = rownames(loads)) + ls <- cbind.data.frame(rep(env, each = nrow(scored)), ls) + names(ls)[1] <- sterm + for(i in 1:nk){ + ts <- paste("fitted", i, sep = "") + ls[[ts]] <- ls[[ns[i]]]*ls[[nl[[i]]]] + } + ls$CVE <- rowSums(ls[,grep("fitted", names(ls))]) + print(dim(ls)) + ilev <- levels(ls$iclass) + ics <- lapply(split(ls, ls$iclass), function(el, ic.num, gterm){ + mld <- apply(el[,grep("loads", names(el)), drop = FALSE][,1:ic.num], 2, mean) + el$OP <- rowSums(t(mld*t(el[,grep("score", names(el)), drop = FALSE][,1:ic.num]))) + el$dev <- el$CVE - rowSums(el[,grep("fitted", names(el)),drop = FALSE][,1:ic.num]) + el$RMSD <- sqrt(tapply(el$dev^2, el[[gterm]], mean)[as.character(el[[gterm]])]) + el + }, ic.num, gterm) + icd <- do.call("rbind.data.frame", ics) + icd[order(icd[[sterm]], icd[[gterm]]),] +} + + +## ggplot theme for designs + +theme_design <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "none", + panel.grid.minor = element_blank(), + panel.grid.major = element_blank(), + panel.background = element_blank(), + axis.line =element_blank(), + axis.ticks =element_blank(), + axis.text.y = element_text(), + axis.text.x = element_text(angle = 0), + axis.title = element_text(), + strip.text = element_text(size = 14) + ) +} + +## ggplot theme for heat maps + +theme_design_heat <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( +# legend.position = "none", + strip.text.x = element_text(size = 14), #margin = margin(0.15,0,0.15,0, "cm")), + panel.grid.minor = element_blank(), + panel.grid.major = element_blank(), + panel.background = element_blank(), + axis.line = element_blank(), + axis.ticks = element_blank(), + axis.text.x = element_text(angle = 45), + axis.title = element_text() + ) +} + +## ggplot theme for scatter plots + +theme_scatter <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "none", + legend.text = element_text(size = 26), + legend.title = element_text(size = 26), + panel.grid.minor = element_line(colour = "grey80"), + panel.grid.major = element_line(colour = "grey80"), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), + axis.text.x = element_text(size = 14), + axis.text.y = element_text(size = 14), + axis.title.y = element_text(size = 20, angle = 90), + axis.title.x = element_text(size = 20), +# axis.title = element_blank(), + strip.text = element_text(size = 16) + ) +} + +## ggplot them for bar plots + +theme_barplot <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "bottom", + legend.title = element_blank(), + legend.text = element_text(size = 16), + panel.grid.minor = element_line(colour = "grey80"), + panel.grid.major = element_line(colour = "grey80"), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 6), + axis.text.y = element_text(size = 12, hjust = 1), + axis.title = element_blank(), + strip.text = element_text(size = 14) + ) +} + +theme_barplot <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "bottom", + legend.title = element_blank(), + legend.text = element_text(size = 16), + panel.grid.minor = element_blank(), + panel.grid.major = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 6), + axis.text.y = element_text(size = 12, hjust = 1), + axis.title = element_blank(), + strip.text = element_text(size = 16) + ) +} + + +waldTest.asreml <- function(object, cc, keep.fac = TRUE) +{ + if(oldClass(object) != "asreml") + stop("Requires an object of class asreml\n") + if(is.null(object$Cfixed)) { + warning("Requires C matrix from model object. Refitting test model with argument \"Cfixed = T\"\n") + asreml.options(Cfixed = TRUE) + object <- update(object) + } + vrb <- as.matrix(object$Cfixed) + tau <- c(object$coefficients$fixed) + names(tau) <- rownames(object$coefficients$fixed) + nc <- length(tau) + sigma2 <- object$sigma2 + vrb <- vrb/sigma2 + ccnams <- names(tau) + zdf <- cdf <- NULL + cc <- lapply(cc, function(el, ccnams){ + if(!all(names(el) %in% c("coef","type","comp","group"))) + stop("Inappropriately named argument for comparison object.") + if(is.numeric(el$coef)) { + if(max(el$coef) > length(ccnams)) + stop("coefficient subscript out of bounds") + names(el$coef) <- ccnams[el$coef] + } + else { + if(any(is.na(pmatch(el$coef, ccnams)))) + stop("Names in contrast do not match the names of coefficients of object") + temp <- pmatch(el$coef, ccnams) + names(temp) <- el$coef + el$coef <- temp + } + el + }, ccnams) + ## split contrasts and other available tests + ctype <- unlist(lapply(cc, function(el) el$type)) + if(!all(ctype %in% c("con","zero"))) + stop("Contrast types must be either \"con\" for treatment comparisons or \"zero\" for testing zero equality") + cons <- cc[ctype %in% "con"] + zero <- cc[ctype %in% "zero"] + cse <- ctau <- zwtest <- cwtest <- zpval <- c() + if(length(cons)) { + CRows <- lapply(cons, function(el, nc){ + if(length(el) < 3){ + con <- contr.helmert(length(el$coef))[, (length(el$coef) - 1)] + names(con) <- cnam <- names(el$coef) + cat("Warning: default contrast being taken for", cnam, "is", con, "\n") + row <- rep(0, nc) + row[el$coef] <- con + row + } + else { + if(is.matrix(el$comp)) { + if(length(el$coef) != ncol(el$comp)) + stop("Length of contrast does not match the number of specified coefficients") + cons <- split(el$comp, 1:nrow(el$comp)) + rows <- lapply(cons, function(ell, first = el$coef, nc){ + row <- rep(0, nc) + row[first] <- ell + row + }, first = el$coef, nc) + rows <- unlist(rows, use.names = F) + matrix(rows, nrow = nrow(el$comp), byrow = T) + } + else { + if(length(el$coef) != length(el$comp)) + stop("Length of contrast does not match the number of specified coefficients") + row <- rep(0, nc) + row[el$coef] <- el$comp + row + } + } + }, nc) + Cmat <- do.call("rbind", CRows) + if(!keep.fac) + ccnams <- substring(ccnams, regexpr("\\_", ccnams) + 1, nchar(ccnams)) + cnam <- lapply(split(Cmat, 1:nrow(Cmat)), function(el, ccnams){ + namr <- ccnams[ifelse(el < 0, T, F)] + naml <- ccnams[ifelse(el > 0, T, F)] + c(paste(naml, collapse = ":"), paste(namr, collapse = ":")) + }, ccnams) + Cnam <- do.call("rbind", cnam) + gnams <- lapply(cons, function(el){ + if(!is.null(el$group)){ + if(!any(names(el$group) %in% c("left","right"))) + stop("group names must be \"left\" and \"right\".") + if(is.null(el$group$left)){ + if(is.matrix(el$comp)) + el$group$left <- rep(NA, nrow(el$comp)) + else el$group$left <- NA + } else { + if(is.matrix(el$comp)){ + if(length(el$group$left) == 1) + el$group$left <- rep(el$group$left, nrow(el$comp)) + if(length(el$group$left) != nrow(el$comp)) + stop("No. of group names do not match the number of comparisons in object") + } + } + if(is.null(el$group$right)){ + if(is.matrix(el$comp)) + el$group$right <- rep(NA, nrow(el$comp)) + else el$group$right <- NA + } else { + if(is.matrix(el$comp)) { + if(length(el$group$right) == 1) + el$group$right <- rep(el$group$right, nrow(el$comp)) + if(length(el$group$right) != nrow(el$comp)) + stop("No. of group names do not match the number of comparisons in object") + } + } + } else { + if(is.matrix(el$comp)) + el$group$left <- el$group$right <- rep(NA, nrow(el$comp)) + else el$group$left <- el$group$right <- NA + } + cbind(el$group$left, el$group$right) + }) + Gnam <- do.call("rbind", gnams) + Cnam[!is.na(Gnam[,1]), 1] <- Gnam[!is.na(Gnam[,1]),1] + Cnam[!is.na(Gnam[,2]), 2] <- Gnam[!is.na(Gnam[,2]),2] + for(i in 1:nrow(Cmat)) { + varmat <- sum(Cmat[i, ]*crossprod(vrb, t(Cmat)[, i])) + cse[i] <- sqrt(varmat * sigma2) + ctau[i] <- sum(Cmat[i, ]*tau) + cwtest[i] <- (ctau[i]/cse[i])^2 + } + cdf <- data.frame(wald = round(cwtest, 6), pval = 1 - pchisq(cwtest, 1), + coef = round(ctau, 6), se = round(cse, 6)) + attr(cdf, "names") <- c("Wald Statistic", "P-Value", "Cont. Coef.", "Std. Error") + attr(cdf, "row.names") <- paste(Cnam[,1], Cnam[,2], sep = " vs ") + oldClass(cdf) <- "data.frame" + } + if(length(zero)) { + ZRows <- lapply(zero, function(el, nc){ + rows <- rep(rep(0, nc), length(el$coef)) + dum <- seq(0, (length(el$coef) - 1) * nc, by = nc) + rows[el$coef + dum] <- 1 + matrix(rows, nrow = length(el$coef), byrow = T) + }, nc) + znam <- unlist(lapply(zero, function(el, ccnams) { + if(is.null(el$group)) + paste(ccnams[el$coef], collapse = ":") + else el$group + }, ccnams)) + if(any(table(znam) > 1)) + stop("Duplicate names in group structures for zero equality tests.") + for(i in 1:length(ZRows)) { + varmat <- ZRows[[i]] %*% crossprod(vrb, t(ZRows[[i]])) + Ctau <- ZRows[[i]] %*% tau + zwtest[i] <- sum(Ctau*crossprod(solve(varmat), Ctau))/sigma2 + zpval[i] <- 1 - pchisq(zwtest[i], nrow(ZRows[[i]])) + } + zdf <- data.frame(wald = round(zwtest, 6), pval = zpval) + attr(zdf, "names") <- c("Wald Statistic", "P-Value") + attr(zdf, "row.names") <- znam + oldClass(zdf) <- "data.frame" + } + res <- list(Contrasts = cdf, Zero = zdf) + invisible(res) +} + +waldTest <- function(object, ...) + UseMethod("waldTest") + From eb11081d5d0a3fe81e09cb0c151a990685e14663 Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Tue, 24 Feb 2026 21:16:28 +1100 Subject: [PATCH 3/8] docs: add documentation for associate(), clones(), compare(), wald.test.asreml() and wald.test() (#5) --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/associate.R | 86 +++++++++++++++++++++--- R/clones.R | 85 ++++++++++++++++++----- R/compare.R | 39 +++++++++-- R/sunrise_sunset_times.R | 2 + R/wald.R | 65 ++++++++++++++++-- man/BiometryTools.Rd | 23 +++++++ man/associate.Rd | 81 +++++++++++++++++++--- man/compare.Rd | 38 +++++++++-- man/extract.Rd | 3 - man/fast.Rd | 3 - man/fineMap.Rd | 3 - man/fixedRegress.Rd | 3 - man/gh_packages.Rd | 29 +++++++- man/herit.asreml.Rd | 3 - man/hsd.Rd | 3 - man/manhattan.Rd | 3 - man/outlier.down.Rd | 3 - man/outlier.rem.Rd | 3 - man/pad.data.Rd | 3 - man/phenClones.Rd | 45 ++++++++++--- man/phenfixClones.Rd | 36 +++++++--- man/randomRegress.Rd | 3 - man/reinstall_packages.Rd | 3 - man/sunrise_time.Rd | 2 + man/themes.Rd | 9 --- man/transfer_packages.Rd | 34 +++++++--- man/wald.test.Rd | 16 +++++ man/wald.test.asreml.Rd | 56 +++++++++++++-- tests/testthat/setup-install_asreml.R | 1 + tests/testthat/teardown-install_asreml.R | 1 + tests/testthat/test-install_asreml.R | 1 + 33 files changed, 553 insertions(+), 135 deletions(-) create mode 100644 man/wald.test.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 34f303b..816296b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,5 +40,5 @@ Enhances: asreml Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.3 SystemRequirements: asreml-R 4.x diff --git a/NAMESPACE b/NAMESPACE index 6ccf3ad..c9e86d7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(theme_design_heat) export(theme_scatter) export(transfer_packages) export(update_asreml) +export(wald.test) import(wgaim) importFrom(crayon,blue) importFrom(crayon,green) diff --git a/R/associate.R b/R/associate.R index 9e6ffd9..4f794de 100644 --- a/R/associate.R +++ b/R/associate.R @@ -1,16 +1,86 @@ -#' Associate BLUEs with BLUPs from predict calls of the same model +#' Associate BLUEs and BLUPs from an ASReml Model #' -#' @param model An `asreml` model. -#' @param ran.term Random terms from the `model`. -#' @param fix.term Fixed terms from the `model`. -#' @param ... Other parameters to be passed to the predict function. +#' Extracts BLUPs (random effects) and BLUEs (fixed effects) +#' from an \code{asreml} fitted model using \code{predict()} +#' and merges them into a single data frame. #' -#' @return JULES COMPLETE -#' @export +#' @param model An \code{asreml} model object. +#' The model must contain a valid \code{model$call$data} reference. +#' +#' @param ran.term Character string specifying the random-effect term +#' to classify in \code{predict()}. Examples: +#' \code{"gen"} or \code{"loc:gen"}. +#' +#' @param fix.term Character string specifying the fixed-effect term +#' to classify in \code{predict()}. Examples: +#' \code{"loc"} or \code{"Variety"}. +#' +#' @param ... Additional arguments passed to \code{predict()}. +#' +#' @details +#' The function performs: +#' \enumerate{ +#' \item \code{predict(model, classify = ran.term, only = ran.term)} +#' to extract BLUPs +#' \item \code{predict(model, classify = fix.term)} +#' to extract BLUEs +#' } +#' +#' If \code{ran.term} is an interaction and uses structured +#' variance models (e.g. \code{fa()}, \code{us()}, \code{diag()}, +#' \code{corh()}, \code{corgh()}), the appropriate term is passed +#' to the \code{only} argument. +#' +#' @return +#' A \code{data.frame} containing: +#' \itemize{ +#' \item Classification variables +#' \item \code{blups} +#' \item \code{blups.std.error} +#' \item \code{blues} +#' \item \code{blues.std.error} +#' } #' +#' @seealso \code{\link[asreml]{predict.asreml}} +#' +#' @export #' @examples #' \dontrun{ -#' JULES COMPLETE +#' if (requireNamespace("asreml", quietly = TRUE)) { +#' +#' data(oats) +#' +#' oats$Blocks <- factor(oats$Blocks) +#' oats$Variety <- factor(oats$Variety) +#' oats$Nitrogen <- factor(oats$Nitrogen) +#' +#' # Fit a simple mixed model +#' m <- asreml::asreml( +#' fixed = yield ~ Variety + Nitrogen, +#' random = ~ Blocks, +#' data = oats +#' ) +#' +#' # Extract BLUPs for Blocks and BLUEs for Variety +#' associate( +#' m, +#' ran.term = "Blocks", +#' fix.term = "Variety" +#' ) +#' +#' # Example with interaction BLUPs +#' m2 <- asreml::asreml( +#' fixed = yield ~ Variety, +#' random = ~ Blocks:Variety, +#' data = oats +#' ) +#' +#' associate( +#' m2, +#' ran.term = "Blocks:Variety", +#' fix.term = "Variety" +#' ) +#' } #' } #' associate <- function(model, ran.term = "Treatment:Cultivar", fix.term = "Treatment:Type", ...) { diff --git a/R/clones.R b/R/clones.R index fe4e01c..9c72054 100644 --- a/R/clones.R +++ b/R/clones.R @@ -1,15 +1,45 @@ -#' Check whether genetic clones are in the experiment +#' Test Genetic Clone Similarity in an ASReml Model #' -#' @param model An `asreml` model. -#' @param cross JULES COMPLETE -#' @param matching JULES COMPLETE -#' @param Envir JULES COMPLETE -#' @param no.samp JULES COMPLETE -#' @param sep JULES COMPLETE +#' Evaluates whether genetically related clones show significantly +#' correlated predicted values from an \code{asreml} model. #' -#' @return A data frame containing JULES COMPLETE -#' @export +#' Clone relationships are extracted from a \code{cross} object, +#' and a permutation test is used to assess whether observed +#' correlations exceed random expectation. +#' +#' @param model An \code{asreml} fitted model object. +#' @param cross A list-like object containing a \code{$pheno} data frame. +#' @param matching Character string giving the genotype column name +#' within both the model predictions and \code{cross$pheno}. +#' @param Envir Optional character string specifying an environment +#' factor. If supplied, clone testing is performed separately +#' within each environment. +#' @param no.samp Number of random permutations used to estimate +#' the null distribution (default = 1000). +#' @param sep Character separator used to split clone identifiers +#' (default = "_"). +#' +#' @details +#' For each pair of cloned genotypes: +#' \enumerate{ +#' \item Predicted values are extracted from \code{predict()}. +#' \item The observed correlation between clone pairs is computed. +#' \item A permutation test generates random correlations. +#' \item A p-value is computed from the F-distribution. +#' } +#' +#' The Type1 column reports the proportion of permutation samples +#' with p < 0.05. +#' +#' @return A data frame with columns: +#' \itemize{ +#' \item \code{Type1}: Estimated type I error rate from permutations +#' \item \code{Correlation}: Observed clone correlation +#' \item \code{P-value}: Significance test for observed correlation +#' } #' +#' @export +#' #' @examples #' \dontrun{ #' JULES COMPLETE @@ -58,19 +88,42 @@ phenClones <- function(model, cross, matching = "Genotype", Envir = NULL, no.sam res } -#' Fix clones if they are in the experiment +#' Collapse Cloned Genotypes into Single Levels +#' +#' Modifies a data frame so that cloned genotypes +#' are treated as identical factor levels. #' -#' @param data The data frame to fix -#' @param cross JULES COMPLETE -#' @param matching The column name to match on -#' @param sep The separator between JULES COMPLETE +#' @param data A data frame containing genotype factor levels. +#' @param cross A list-like object containing a \code{$pheno} data frame. +#' @param matching Character string specifying genotype column name. +#' @param sep Character separator used to identify clones (default "_"). +#' +#' @return A modified data frame where cloned genotype +#' levels are collapsed into single combined labels. #' -#' @return The JULES COMPLETE #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' # Fake cross object +#' cross <- list( +#' pheno = data.frame( +#' Genotype = c("A_B", "C", "D_E") +#' ) +#' ) +#' # Fake prediction table +#' fake_model <- list() +#' # Mock predict method +#' predict <- function(model, classify, only = NULL) { +#' data <- data.frame( +#' Genotype = c("A","B","C","D","E"), +#' predicted.value = rnorm(5) +#' ) +#' list(pvals = data) +#' } +#' # Test clone fixing +#' df <- data.frame(Genotype = factor(c("A","B","C","D","E"))) +#' phenfixClones(df, cross) #' } phenfixClones <- function(data, cross, matching = "Genotype", sep = "_") { mg <- as.character(cross$pheno[[matching]]) diff --git a/R/compare.R b/R/compare.R index 98da6bb..196fb43 100644 --- a/R/compare.R +++ b/R/compare.R @@ -1,11 +1,38 @@ -#' BLUEs LSD/p-value comparison function +#' Pairwise comparison of BLUEs using LSD or p-values #' -#' @param model An `asreml` model -#' @param term JULES COMPLETE -#' @param type The type of comparison. Can take values of `PVAL` or `LSD`. -#' @param average.LSD JULES COMPLETE +#' Computes pairwise comparisons among predicted values (typically BLUEs) +#' from an \code{asreml} model for a given classification term. Uses +#' \code{predict(..., sed = TRUE)} to obtain predicted values and the +#' SED (standard error of differences) matrix. Depending on \code{type}, +#' returns either a pairwise LSD matrix or a symmetric matrix of +#' pairwise p-values derived from Wald contrasts. +#' +#' @param model An \code{asreml} fitted model object. +#' @param term Character string giving the classification term passed to +#' \code{predict()}, e.g. \code{"Line"} or an interaction like \code{"Env:Line"}. +#' @param type Character string specifying the comparison type: +#' \code{"LSD"} or \code{"PVAL"}. +#' @param average.LSD Logical (default \code{FALSE}). If \code{TRUE} and +#' \code{type = "LSD"}, returns a single \code{ave.LSD} column computed as the +#' mean of the lower-triangular LSD values instead of a full LSD matrix. +#' +#' @details +#' \strong{LSD:} The least significant difference is computed as +#' \code{LSD = SED * qt(0.025, df = model$nedf, lower.tail = FALSE)}. +#' +#' \strong{PVAL:} All pairwise contrasts are constructed and assessed using +#' a Wald test (via \code{wald.test()}), returning a symmetric p-value matrix. +#' +#' Rows with \code{NA} predicted values are dropped, and the corresponding +#' rows/columns are removed from the SED matrix. +#' +#' @return A \code{data.frame} containing the classification column(s), +#' predicted values, and either: +#' \itemize{ +#' \item an LSD matrix (or \code{ave.LSD} column), or +#' \item a symmetric matrix of pairwise p-values. +#' } #' -#' @return #' @export #' #' @examples diff --git a/R/sunrise_sunset_times.R b/R/sunrise_sunset_times.R index 71e0adf..f45a762 100644 --- a/R/sunrise_sunset_times.R +++ b/R/sunrise_sunset_times.R @@ -48,9 +48,11 @@ #' @export #' #' @examples +#' \dontrun{ #' library(lubridate) #' date <- as_datetime("2020-01-01", tz = "Australia/Adelaide") #' sunrise_time(date, -35.69167, 136.9650) +#' } sunrise_time <- function(datetime, latitude, longitude) { coordinates <- sp::SpatialPoints( matrix(c(longitude, latitude), ncol = 2), diff --git a/R/wald.R b/R/wald.R index 76b9121..32e5c33 100644 --- a/R/wald.R +++ b/R/wald.R @@ -1,15 +1,63 @@ -#' Title +#' Wald tests for linear hypotheses in ASReml models #' -#' @param object An `asreml` object. -#' @param cc JULES COMPLETE -#' @param keep.fac Logical (default `TRUE`). Keep factors? +#' Performs Wald tests on fixed-effect coefficients from an \code{asreml} +#' fitted model using the model's fixed-effect coefficient covariance matrix +#' (the \code{Cfixed} / "C" matrix). Supports (i) linear contrasts among +#' coefficients and (ii) joint tests of whether selected coefficients are zero. #' -#' @return A list containing the wald output. +#' @param object An \code{asreml} fitted model object. +#' @param cc A list of comparison specifications. Each element must be a list +#' containing \code{type} and \code{coef}, and optionally \code{comp} and \code{group}. +#' +#' Valid fields per comparison: +#' \itemize{ +#' \item \code{type}: either \code{"con"} (contrast test) or \code{"zero"} (joint zero test). +#' \item \code{coef}: coefficients involved in the test, either numeric indices +#' into \code{object$coefficients$fixed} or character names matching them. +#' \item \code{comp}: for \code{type = "con"}, either a numeric vector of contrast +#' weights (same length as \code{coef}) or a matrix with one row per contrast and +#' \code{ncol(comp) == length(coef)}. +#' If omitted for \code{"con"}, a default Helmert-type contrast is used. +#' \item \code{group}: optional labels used for printing. For \code{"con"} this may be +#' a list with elements \code{left} and \code{right} (scalars or vectors matching the +#' number of contrasts). For \code{"zero"} this may be a single character label for the test. +#' } +#' +#' @param keep.fac Logical (default \code{TRUE}). If \code{FALSE}, coefficient names are +#' shortened for display by stripping factor prefixes up to the first underscore. +#' +#' @details +#' The function requires access to \code{object$Cfixed}. If it is not present, +#' the model is refitted with \code{Cfixed = TRUE} via \code{asreml.options(Cfixed = TRUE)} +#' followed by \code{update(object)}. +#' +#' For \code{type = "con"}, each contrast is tested with 1 degree of freedom: +#' \deqn{W = (\hat\theta/\mathrm{SE}(\hat\theta))^2 \sim \chi^2_1} +#' where \eqn{\hat\theta = c^\top \hat\beta}. +#' +#' For \code{type = "zero"}, selected coefficients are tested jointly: +#' \deqn{W = (Z\hat\beta)^\top (Z V Z^\top)^{-1} (Z\hat\beta) \sim \chi^2_k} +#' where \eqn{k} is the number of selected coefficients. +#' +#' The function prints tables to the console and returns results invisibly. +#' +#' @return +#' An (invisible) list with components: +#' \itemize{ +#' \item \code{Contrasts}: a data frame of Wald statistics, p-values, contrast estimates and SEs +#' for \code{type = "con"} tests (or \code{NULL} if none requested). +#' \item \code{Zero}: a data frame of Wald statistics and p-values for \code{type = "zero"} tests +#' (or \code{NULL} if none requested). +#' } +#' +#' @method wald.test asreml #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' # TODO example1 (contrast): +#' +#' # TODO example2 (joint zero test): #' } wald.test.asreml <- function(object, cc, keep.fac = TRUE) { if (oldClass(object) != "asreml") { @@ -197,6 +245,11 @@ wald.test.asreml <- function(object, cc, keep.fac = TRUE) { invisible(res) } +#' Wald tests for linear hypotheses +#' +#' @param object An object. Methods are available for \code{asreml}. +#' @param ... Passed to methods. +#' @export wald.test <- function(object, ...) { UseMethod("wald.test") } diff --git a/man/BiometryTools.Rd b/man/BiometryTools.Rd index 003760c..ac440a4 100644 --- a/man/BiometryTools.Rd +++ b/man/BiometryTools.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/BiometryTools.R \docType{package} \name{BiometryTools} +\alias{BiometryTools-package} \alias{BiometryTools} \title{BiometryTools: A package containing some useful functions for linear mixed model analysis} \description{ @@ -12,3 +13,25 @@ The BiometryTools package provides ... The foo functions ... } +\seealso{ +Useful links: +\itemize{ + \item \url{https://biometryhub.github.io/BiometryTools/} + \item Report bugs at \url{https://github.com/biometryhub/BiometryTools/issues} +} + +} +\author{ +\strong{Maintainer}: Sam Rogers \email{biometryhub@adelaide.edu.au} [contributor] + +Authors: +\itemize{ + \item Julian Taylor \email{biometryhub@adelaide.edu.au} +} + +Other contributors: +\itemize{ + \item Russell Edson \email{biometryhub@adelaide.edu.au} [contributor] +} + +} diff --git a/man/associate.Rd b/man/associate.Rd index 5765a6a..96ed6a2 100644 --- a/man/associate.Rd +++ b/man/associate.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/associate.R \name{associate} \alias{associate} -\title{Associate BLUEs with BLUPs from predict calls of the same model} +\title{Associate BLUEs and BLUPs from an ASReml Model} \usage{ associate( model, @@ -12,23 +12,88 @@ associate( ) } \arguments{ -\item{model}{An \code{asreml} model.} +\item{model}{An \code{asreml} model object. +The model must contain a valid \code{model$call$data} reference.} -\item{ran.term}{Random terms from the \code{model}.} +\item{ran.term}{Character string specifying the random-effect term +to classify in \code{predict()}. Examples: +\code{"gen"} or \code{"loc:gen"}.} -\item{fix.term}{Fixed terms from the \code{model}.} +\item{fix.term}{Character string specifying the fixed-effect term +to classify in \code{predict()}. Examples: +\code{"loc"} or \code{"Variety"}.} -\item{...}{Other parameters to be passed to the predict function.} +\item{...}{Additional arguments passed to \code{predict()}.} } \value{ -JULES COMPLETE +A \code{data.frame} containing: +\itemize{ +\item Classification variables +\item \code{blups} +\item \code{blups.std.error} +\item \code{blues} +\item \code{blues.std.error} +} } \description{ -Associate BLUEs with BLUPs from predict calls of the same model +Extracts BLUPs (random effects) and BLUEs (fixed effects) +from an \code{asreml} fitted model using \code{predict()} +and merges them into a single data frame. +} +\details{ +The function performs: +\enumerate{ +\item \code{predict(model, classify = ran.term, only = ran.term)} +to extract BLUPs +\item \code{predict(model, classify = fix.term)} +to extract BLUEs +} + +If \code{ran.term} is an interaction and uses structured +variance models (e.g. \code{fa()}, \code{us()}, \code{diag()}, +\code{corh()}, \code{corgh()}), the appropriate term is passed +to the \code{only} argument. } \examples{ \dontrun{ -JULES COMPLETE +if (requireNamespace("asreml", quietly = TRUE)) { + + data(oats) + + oats$Blocks <- factor(oats$Blocks) + oats$Variety <- factor(oats$Variety) + oats$Nitrogen <- factor(oats$Nitrogen) + + # Fit a simple mixed model + m <- asreml::asreml( + fixed = yield ~ Variety + Nitrogen, + random = ~ Blocks, + data = oats + ) + + # Extract BLUPs for Blocks and BLUEs for Variety + associate( + m, + ran.term = "Blocks", + fix.term = "Variety" + ) + + # Example with interaction BLUPs + m2 <- asreml::asreml( + fixed = yield ~ Variety, + random = ~ Blocks:Variety, + data = oats + ) + + associate( + m2, + ran.term = "Blocks:Variety", + fix.term = "Variety" + ) +} } } +\seealso{ +\code{\link[asreml]{predict.asreml}} +} diff --git a/man/compare.Rd b/man/compare.Rd index f2299b9..04c429d 100644 --- a/man/compare.Rd +++ b/man/compare.Rd @@ -2,24 +2,48 @@ % Please edit documentation in R/compare.R \name{compare} \alias{compare} -\title{BLUEs LSD/p-value comparison function} +\title{Pairwise comparison of BLUEs using LSD or p-values} \usage{ compare(model, term = "Line", type = "PVAL", average.LSD = FALSE) } \arguments{ -\item{model}{An \code{asreml} model} +\item{model}{An \code{asreml} fitted model object.} -\item{term}{JULES COMPLETE} +\item{term}{Character string giving the classification term passed to +\code{predict()}, e.g. \code{"Line"} or an interaction like \code{"Env:Line"}.} -\item{type}{The type of comparison. Can take values of \code{PVAL} or \code{LSD}.} +\item{type}{Character string specifying the comparison type: +\code{"LSD"} or \code{"PVAL"}.} -\item{average.LSD}{JULES COMPLETE} +\item{average.LSD}{Logical (default \code{FALSE}). If \code{TRUE} and +\code{type = "LSD"}, returns a single \code{ave.LSD} column computed as the +mean of the lower-triangular LSD values instead of a full LSD matrix.} } \value{ - +A \code{data.frame} containing the classification column(s), +predicted values, and either: +\itemize{ +\item an LSD matrix (or \code{ave.LSD} column), or +\item a symmetric matrix of pairwise p-values. +} } \description{ -BLUEs LSD/p-value comparison function +Computes pairwise comparisons among predicted values (typically BLUEs) +from an \code{asreml} model for a given classification term. Uses +\code{predict(..., sed = TRUE)} to obtain predicted values and the +SED (standard error of differences) matrix. Depending on \code{type}, +returns either a pairwise LSD matrix or a symmetric matrix of +pairwise p-values derived from Wald contrasts. +} +\details{ +\strong{LSD:} The least significant difference is computed as +\code{LSD = SED * qt(0.025, df = model$nedf, lower.tail = FALSE)}. + +\strong{PVAL:} All pairwise contrasts are constructed and assessed using +a Wald test (via \code{wald.test()}), returning a symmetric p-value matrix. + +Rows with \code{NA} predicted values are dropped, and the corresponding +rows/columns are removed from the SED matrix. } \examples{ \dontrun{ diff --git a/man/extract.Rd b/man/extract.Rd index fa04f48..e98b5a6 100644 --- a/man/extract.Rd +++ b/man/extract.Rd @@ -16,9 +16,6 @@ extract( } \arguments{ \item{fill}{} -} -\value{ - } \description{ Title diff --git a/man/fast.Rd b/man/fast.Rd index eeda4ff..1a076ff 100644 --- a/man/fast.Rd +++ b/man/fast.Rd @@ -8,9 +8,6 @@ fast(model, dat = NULL, term = "fa(Site, 4):Genotype", ...) } \arguments{ \item{...}{} -} -\value{ - } \description{ FAST: overall performance and stability for interpreting Factor Analytic models diff --git a/man/fineMap.Rd b/man/fineMap.Rd index 33ca289..17fca03 100644 --- a/man/fineMap.Rd +++ b/man/fineMap.Rd @@ -15,9 +15,6 @@ fineMap( } \arguments{ \item{...}{} -} -\value{ - } \description{ Fine map a \code{wgaim} object diff --git a/man/fixedRegress.Rd b/man/fixedRegress.Rd index 310f9cc..53ab6af 100644 --- a/man/fixedRegress.Rd +++ b/man/fixedRegress.Rd @@ -13,9 +13,6 @@ fixedRegress( } \arguments{ \item{robust}{} -} -\value{ - } \description{ Fixed regression for doing stuff diff --git a/man/gh_packages.Rd b/man/gh_packages.Rd index b4b287d..77e8a47 100644 --- a/man/gh_packages.Rd +++ b/man/gh_packages.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/gh_packages.R \name{gh_packages} \alias{gh_packages} -\title{Checks if a package was installed from GitHub} +\title{Identify packages installed from remote sources} \usage{ gh_packages(pkg) } @@ -10,9 +10,32 @@ gh_packages(pkg) \item{pkg}{A character vector of package names to check.} } \value{ -A dataframe containing packages installed from remote sources, with the type of remote, account name and repo name. +A data frame containing packages installed from remote sources with columns: +\itemize{ +\item \code{type}: The type of remote source (e.g., "github", "gitlab") +\item \code{account}: The account/username of the remote repository +\item \code{repo}: The repository name (same as package name) +} +Returns an empty data frame if no packages are from remote sources. } \description{ -Checks if a package was installed from GitHub +This function checks which packages in the provided list were installed from +remote sources (GitHub, GitLab, etc.) rather than CRAN, and returns their +remote source information. +} +\details{ +The function examines the DESCRIPTION file of each package to determine if it +was installed from a remote source. Packages installed from CRAN or other +standard repositories will not have RemoteType information. +} +\examples{ +\dontrun{ +# Check if specific packages are from GitHub +gh_packages(c("ggplot2", "devtools", "BiometryTools")) + +# Check all installed packages +gh_packages(rownames(installed.packages())) +} + } \keyword{internal} diff --git a/man/herit.asreml.Rd b/man/herit.asreml.Rd index 3ea361d..88cabb8 100644 --- a/man/herit.asreml.Rd +++ b/man/herit.asreml.Rd @@ -8,9 +8,6 @@ herit.asreml(model, term = "SYear:Genotype", ...) } \arguments{ \item{...}{} -} -\value{ - } \description{ Heritability for multi/single environment trials diff --git a/man/hsd.Rd b/man/hsd.Rd index d6e2251..ccdce4f 100644 --- a/man/hsd.Rd +++ b/man/hsd.Rd @@ -14,9 +14,6 @@ hsd( } \arguments{ \item{...}{} -} -\value{ - } \description{ Title diff --git a/man/manhattan.Rd b/man/manhattan.Rd index 2a8753c..e445016 100644 --- a/man/manhattan.Rd +++ b/man/manhattan.Rd @@ -8,9 +8,6 @@ manhattan(mlist, cross, chr.in = NULL, annotate = TRUE, ...) } \arguments{ \item{...}{} -} -\value{ - } \description{ Manhattan plot using ggplot diff --git a/man/outlier.down.Rd b/man/outlier.down.Rd index c2281d4..09679a5 100644 --- a/man/outlier.down.Rd +++ b/man/outlier.down.Rd @@ -8,9 +8,6 @@ outlier.down(data, model, cutoff = 3) } \arguments{ \item{cutoff}{} -} -\value{ - } \description{ Downweight outliers diff --git a/man/outlier.rem.Rd b/man/outlier.rem.Rd index 322bcb9..2b942cc 100644 --- a/man/outlier.rem.Rd +++ b/man/outlier.rem.Rd @@ -8,9 +8,6 @@ outlier.rem(data, model, cutoff = 3) } \arguments{ \item{cutoff}{} -} -\value{ - } \description{ Outlier removal function diff --git a/man/pad.data.Rd b/man/pad.data.Rd index d3a2c44..0478a92 100644 --- a/man/pad.data.Rd +++ b/man/pad.data.Rd @@ -8,9 +8,6 @@ pad.data(data, pattern = "Row:Column", split = "Block", keep = 4, fill = NULL) } \arguments{ \item{fill}{} -} -\value{ - } \description{ Title diff --git a/man/phenClones.Rd b/man/phenClones.Rd index 03d5d9b..09708c4 100644 --- a/man/phenClones.Rd +++ b/man/phenClones.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/clones.R \name{phenClones} \alias{phenClones} -\title{Check whether genetic clones are in the experiment} +\title{Test Genetic Clone Similarity in an ASReml Model} \usage{ phenClones( model, @@ -14,23 +14,50 @@ phenClones( ) } \arguments{ -\item{model}{An \code{asreml} model.} +\item{model}{An \code{asreml} fitted model object.} -\item{cross}{JULES COMPLETE} +\item{cross}{A list-like object containing a \code{$pheno} data frame.} -\item{matching}{JULES COMPLETE} +\item{matching}{Character string giving the genotype column name +within both the model predictions and \code{cross$pheno}.} -\item{Envir}{JULES COMPLETE} +\item{Envir}{Optional character string specifying an environment +factor. If supplied, clone testing is performed separately +within each environment.} -\item{no.samp}{JULES COMPLETE} +\item{no.samp}{Number of random permutations used to estimate +the null distribution (default = 1000).} -\item{sep}{JULES COMPLETE} +\item{sep}{Character separator used to split clone identifiers +(default = "_").} } \value{ -A data frame containing JULES COMPLETE +A data frame with columns: +\itemize{ +\item \code{Type1}: Estimated type I error rate from permutations +\item \code{Correlation}: Observed clone correlation +\item \code{P-value}: Significance test for observed correlation +} } \description{ -Check whether genetic clones are in the experiment +Evaluates whether genetically related clones show significantly +correlated predicted values from an \code{asreml} model. +} +\details{ +Clone relationships are extracted from a \code{cross} object, +and a permutation test is used to assess whether observed +correlations exceed random expectation. + +For each pair of cloned genotypes: +\enumerate{ +\item Predicted values are extracted from \code{predict()}. +\item The observed correlation between clone pairs is computed. +\item A permutation test generates random correlations. +\item A p-value is computed from the F-distribution. +} + +The Type1 column reports the proportion of permutation samples +with p < 0.05. } \examples{ \dontrun{ diff --git a/man/phenfixClones.Rd b/man/phenfixClones.Rd index 4a3edd2..bdd2781 100644 --- a/man/phenfixClones.Rd +++ b/man/phenfixClones.Rd @@ -2,27 +2,47 @@ % Please edit documentation in R/clones.R \name{phenfixClones} \alias{phenfixClones} -\title{Fix clones if they are in the experiment} +\title{Collapse Cloned Genotypes into Single Levels} \usage{ phenfixClones(data, cross, matching = "Genotype", sep = "_") } \arguments{ -\item{data}{The data frame to fix} +\item{data}{A data frame containing genotype factor levels.} -\item{cross}{JULES COMPLETE} +\item{cross}{A list-like object containing a \code{$pheno} data frame.} -\item{matching}{The column name to match on} +\item{matching}{Character string specifying genotype column name.} -\item{sep}{The separator between JULES COMPLETE} +\item{sep}{Character separator used to identify clones (default "_").} } \value{ -The JULES COMPLETE +A modified data frame where cloned genotype +levels are collapsed into single combined labels. } \description{ -Fix clones if they are in the experiment +Modifies a data frame so that cloned genotypes +are treated as identical factor levels. } \examples{ \dontrun{ -JULES COMPLETE +# Fake cross object +cross <- list( +pheno = data.frame( +Genotype = c("A_B", "C", "D_E") +) +) +# Fake prediction table +fake_model <- list() +# Mock predict method +predict <- function(model, classify, only = NULL) { +data <- data.frame( +Genotype = c("A","B","C","D","E"), +predicted.value = rnorm(5) +) +list(pvals = data) +} +# Test clone fixing +df <- data.frame(Genotype = factor(c("A","B","C","D","E"))) +phenfixClones(df, cross) } } diff --git a/man/randomRegress.Rd b/man/randomRegress.Rd index 2feddf9..4f86960 100644 --- a/man/randomRegress.Rd +++ b/man/randomRegress.Rd @@ -16,9 +16,6 @@ randomRegress(model, Env = "TSite:Variety", levs = NULL, sep = "-", ...) \item{sep}{separator used for Treat x Site names (if multi-x model), if not present assumes single section} \item{...}{Other parameters passed to \code{\link[asreml:predict.asreml]{asreml::predict.asreml()}}.} -} -\value{ - } \description{ The function assumes you have a Treatment x Site factor that is a composite of treatments and sites. The function requires no specific ordering of the factor levels. diff --git a/man/reinstall_packages.Rd b/man/reinstall_packages.Rd index dcee3bb..7e76092 100644 --- a/man/reinstall_packages.Rd +++ b/man/reinstall_packages.Rd @@ -10,9 +10,6 @@ reinstall_packages(location = .libPaths()[1], source = FALSE) \item{location}{Location to check for installed packages and to reinstall to. Defaults to the first option in \code{.libPaths()}.} \item{source}{Logical. Install packages from source that have later source versions than binaries? This should usually be FALSE.} -} -\value{ - } \description{ Reinstall all currently installed packages diff --git a/man/sunrise_time.Rd b/man/sunrise_time.Rd index 0eb5cd2..2cdbcb0 100644 --- a/man/sunrise_time.Rd +++ b/man/sunrise_time.Rd @@ -20,7 +20,9 @@ A POSIXct object for the local time of sunrise. Return the time of sunrise given the date and GPS coordinates. } \examples{ +\dontrun{ library(lubridate) date <- as_datetime("2020-01-01", tz = "Australia/Adelaide") sunrise_time(date, -35.69167, 136.9650) } +} diff --git a/man/themes.Rd b/man/themes.Rd index 6c00f8a..075079b 100644 --- a/man/themes.Rd +++ b/man/themes.Rd @@ -20,15 +20,6 @@ theme_barplot(base_size = 11, base_family = "") \item{base_size}{Plot font size, given in pts.} \item{base_family}{Font family used for the plot.} -} -\value{ - - - - - - - } \description{ Themes that provide some custom styling for \code{ggplot2} plots. diff --git a/man/transfer_packages.Rd b/man/transfer_packages.Rd index 6855b77..aebface 100644 --- a/man/transfer_packages.Rd +++ b/man/transfer_packages.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/transfer_packages.R \name{transfer_packages} \alias{transfer_packages} -\title{Easily reinstall all currently installed packages on another machine or version of R.} +\title{Easily reinstall all currently installed packages on another machine or version of R} \usage{ transfer_packages( library = .libPaths()[1], @@ -16,25 +16,37 @@ transfer_packages( \arguments{ \item{library}{The location of the library on the current machine to copy.} -\item{output}{One of \code{online} (the default), \code{gist} or \code{local}. Saves a list of installed packages to the chosen location, and provides instructions on how to use this to (re)install the packages elsewhere. See details for more information.} +\item{output}{One of \code{online} (the default), \code{gist} or \code{local}. Saves a list of +installed packages to the chosen location, and provides instructions on how to +use this to (re)install the packages elsewhere. See details for more information.} -\item{expiry}{Expiry for online file store in days. Weeks can be given with \code{w}, or months with \code{m}. Default is 7 days. Will be ignored if \code{output} is not \code{online}.} +\item{expiry}{Expiry for online file store in days. Weeks can be given with \code{w}, +or months with \code{m}. Default is 7 days. Will be ignored if \code{output} is not \code{online}.} -\item{filename}{Filename for the local output file. Ignored if \code{output} is not set to \code{local}.} +\item{filename}{Filename for the local output file. Ignored if \code{output} is not +set to \code{local}.} -\item{list_remotes}{Logical (default \code{TRUE}). Check for any packages installed from repositories other than CRAN, and output instructions to reinstall.} +\item{list_remotes}{Logical (default \code{TRUE}). Check for any packages installed +from repositories other than CRAN, and output instructions to reinstall.} \item{quiet}{Logical (default \code{FALSE}). Suppress output if \code{TRUE}.} } \value{ -Prints instructions to console if \code{quiet = FALSE}, and invisibly returns the source command to use on the other machine. +Invisibly returns the command to run on the target machine. If \code{quiet = FALSE}, +also prints instructions to console. } \description{ -Easily reinstall all currently installed packages on another machine or version of R. +Easily reinstall all currently installed packages on another machine or version of R } \details{ -If \code{output} is \code{online}, the resulting list of currently installed packages is stored on \url{https://file.io} for the time specified in \code{expiry}, or until the URL is first accessed. -Note that both visiting the URL and sourcing the URL count as access, and it will be removed after either. -If \code{output} is \code{local}, an R script file (\code{.R}) is saved to the current working directory, which can be transferred manually to another machine. -Beware if using \code{quiet = TRUE} together with \code{output = online}, as the source command will not be +If \code{output} is \code{online}, the resulting list of currently installed packages +is stored on \url{https://file.io} for the time specified in \code{expiry}, +or until the URL is first accessed. Note that either visiting the URL in a browser +or sourcing the URL via R count as access, and it will be removed after either. + +If \code{output} is \code{local}, an R script file (\code{.R}) is saved to the current working +directory, which can be transferred manually to another machine. + +If \code{output} is \code{gist}, the script is uploaded as a GitHub gist (requires the +\code{gistr} package). } diff --git a/man/wald.test.Rd b/man/wald.test.Rd new file mode 100644 index 0000000..2d2ab01 --- /dev/null +++ b/man/wald.test.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wald.R +\name{wald.test} +\alias{wald.test} +\title{Wald tests for linear hypotheses} +\usage{ +wald.test(object, ...) +} +\arguments{ +\item{object}{An object. Methods are available for \code{asreml}.} + +\item{...}{Passed to methods.} +} +\description{ +Wald tests for linear hypotheses +} diff --git a/man/wald.test.asreml.Rd b/man/wald.test.asreml.Rd index eea73d5..f07206b 100644 --- a/man/wald.test.asreml.Rd +++ b/man/wald.test.asreml.Rd @@ -2,25 +2,67 @@ % Please edit documentation in R/wald.R \name{wald.test.asreml} \alias{wald.test.asreml} -\title{Title} +\title{Wald tests for linear hypotheses in ASReml models} \usage{ \method{wald.test}{asreml}(object, cc, keep.fac = TRUE) } \arguments{ -\item{object}{An \code{asreml} object.} +\item{object}{An \code{asreml} fitted model object.} -\item{cc}{JULES COMPLETE} +\item{cc}{A list of comparison specifications. Each element must be a list +containing \code{type} and \code{coef}, and optionally \code{comp} and \code{group}. -\item{keep.fac}{Logical (default \code{TRUE}). Keep factors?} +Valid fields per comparison: +\itemize{ +\item \code{type}: either \code{"con"} (contrast test) or \code{"zero"} (joint zero test). +\item \code{coef}: coefficients involved in the test, either numeric indices +into \code{object$coefficients$fixed} or character names matching them. +\item \code{comp}: for \code{type = "con"}, either a numeric vector of contrast +weights (same length as \code{coef}) or a matrix with one row per contrast and +\code{ncol(comp) == length(coef)}. +If omitted for \code{"con"}, a default Helmert-type contrast is used. +\item \code{group}: optional labels used for printing. For \code{"con"} this may be +a list with elements \code{left} and \code{right} (scalars or vectors matching the +number of contrasts). For \code{"zero"} this may be a single character label for the test. +}} + +\item{keep.fac}{Logical (default \code{TRUE}). If \code{FALSE}, coefficient names are +shortened for display by stripping factor prefixes up to the first underscore.} } \value{ -A list containing the wald output. +An (invisible) list with components: +\itemize{ +\item \code{Contrasts}: a data frame of Wald statistics, p-values, contrast estimates and SEs +for \code{type = "con"} tests (or \code{NULL} if none requested). +\item \code{Zero}: a data frame of Wald statistics and p-values for \code{type = "zero"} tests +(or \code{NULL} if none requested). +} } \description{ -Title +Performs Wald tests on fixed-effect coefficients from an \code{asreml} +fitted model using the model's fixed-effect coefficient covariance matrix +(the \code{Cfixed} / "C" matrix). Supports (i) linear contrasts among +coefficients and (ii) joint tests of whether selected coefficients are zero. +} +\details{ +The function requires access to \code{object$Cfixed}. If it is not present, +the model is refitted with \code{Cfixed = TRUE} via \code{asreml.options(Cfixed = TRUE)} +followed by \code{update(object)}. + +For \code{type = "con"}, each contrast is tested with 1 degree of freedom: +\deqn{W = (\hat\theta/\mathrm{SE}(\hat\theta))^2 \sim \chi^2_1} +where \eqn{\hat\theta = c^\top \hat\beta}. + +For \code{type = "zero"}, selected coefficients are tested jointly: +\deqn{W = (Z\hat\beta)^\top (Z V Z^\top)^{-1} (Z\hat\beta) \sim \chi^2_k} +where \eqn{k} is the number of selected coefficients. + +The function prints tables to the console and returns results invisibly. } \examples{ \dontrun{ -JULES COMPLETE +# TODO example1 (contrast): + +# TODO example2 (joint zero test): } } diff --git a/tests/testthat/setup-install_asreml.R b/tests/testthat/setup-install_asreml.R index 62026a7..fcc36b5 100644 --- a/tests/testthat/setup-install_asreml.R +++ b/tests/testthat/setup-install_asreml.R @@ -1,3 +1,4 @@ +testthat::skip("Skipping set-install_asreml tests during development") expect_file_2 <- function(fn, args, pat, dir = ".", missing = F) { x <- do.call(fn, args) if (!missing) { diff --git a/tests/testthat/teardown-install_asreml.R b/tests/testthat/teardown-install_asreml.R index 62d1103..da22797 100644 --- a/tests/testthat/teardown-install_asreml.R +++ b/tests/testthat/teardown-install_asreml.R @@ -1,3 +1,4 @@ +testthat::skip("Skipping teardown-install_asreml tests during development") # Delete file if it exists if(length(list.files(pattern = "asreml+(([a-zA-Z0-9_.\\-])*)+(.zip|.tar.gz|.tgz)", recursive = T))>0) { file.remove(list.files(pattern = "asreml+(([a-zA-Z0-9_.\\-])*)+(.zip|.tar.gz|.tgz)", recursive = T)) diff --git a/tests/testthat/test-install_asreml.R b/tests/testthat/test-install_asreml.R index d08c936..64a3d9d 100644 --- a/tests/testthat/test-install_asreml.R +++ b/tests/testthat/test-install_asreml.R @@ -1,3 +1,4 @@ +testthat::skip("Skipping test-install_asreml tests during development") skip_if(R.version$status == "Under development (unstable)") test_that("Installation works", { From ee79a9fddac1ee7071906428d01d668c621002de Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Wed, 4 Mar 2026 23:48:22 +1100 Subject: [PATCH 4/8] Included the `ASExtras4` Dependencies. Updated the functions to scripts as per the master files specifies. Updated documentations for `themes`, `pad.data()`, `outliers`, `manhattan()`, `fastIC()` (#5) --- DESCRIPTION | 1 + NAMESPACE | 1 + R/BiometryTools.R | 10 +- R/associate.R | 74 ++-- R/clones.R | 108 +++-- R/conv3.R | 275 ++++++++++--- R/extract.R | 146 ++++--- R/fast.R | 379 ++++++++++++++++-- R/fineMap.R | 223 +++++++---- R/heritability.R | 81 ++-- R/manhattan.R | 148 ++++--- R/outliers.R | 141 ++++--- R/pad.data.R | 119 ++++-- R/regresion_functions.R | 322 +++++++++++---- R/reinstall_packages.R | 1 - R/themes.R | 180 +++++---- R/wald.R | 321 +++++++-------- ...metryTools.Rd => BiometryTools-package.Rd} | 12 +- man/associate.Rd | 9 +- man/compare.Rd | 11 +- man/conv.Rd | 96 ++++- man/extract.Rd | 60 ++- man/fast.Rd | 99 ++++- man/fastIC.Rd | 143 +++++++ man/fineMap.Rd | 57 ++- man/manhattan.Rd | 41 +- man/outlier.down.Rd | 19 - man/outlier.rem.Rd | 19 - man/outliers.Rd | 66 +++ man/pad.data.Rd | 53 ++- man/randomRegress.Rd | 9 +- man/themes.Rd | 51 ++- 32 files changed, 2334 insertions(+), 941 deletions(-) rename man/{BiometryTools.Rd => BiometryTools-package.Rd} (79%) create mode 100644 man/fastIC.Rd delete mode 100644 man/outlier.down.Rd delete mode 100644 man/outlier.rem.Rd create mode 100644 man/outliers.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 816296b..83a5570 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,7 @@ License: MIT + file LICENSE URL: https://biometryhub.github.io/BiometryTools/ BugReports: https://github.com/biometryhub/BiometryTools/issues Imports: + ASExtras4, crayon, ggplot2, glue, diff --git a/NAMESPACE b/NAMESPACE index c9e86d7..13988d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(coord_distance) export(extract) export(extract_time_series) export(fast) +export(fastIC) export(fineMap) export(fixedRegress) export(herit.asreml) diff --git a/R/BiometryTools.R b/R/BiometryTools.R index b0af8c6..a9d9d46 100644 --- a/R/BiometryTools.R +++ b/R/BiometryTools.R @@ -1,10 +1,6 @@ -#' BiometryTools: A package containing some useful functions for linear mixed model analysis +#' BiometryTools: A package containing useful functions for linear mixed model analysis #' #' The BiometryTools package provides ... #' -#' @section Foo functions: -#' The foo functions ... -#' -#' @docType package -#' @name BiometryTools -NULL +#' @keywords internal +"_PACKAGE" \ No newline at end of file diff --git a/R/associate.R b/R/associate.R index 4f794de..54f49b9 100644 --- a/R/associate.R +++ b/R/associate.R @@ -1,4 +1,4 @@ -#' Associate BLUEs and BLUPs from an ASReml Model +#' Associate BLUEs with BLUPs form predict calls of the same model #' #' Extracts BLUPs (random effects) and BLUEs (fixed effects) #' from an \code{asreml} fitted model using \code{predict()} @@ -83,33 +83,49 @@ #' } #' } #' -associate <- function(model, ran.term = "Treatment:Cultivar", fix.term = "Treatment:Type", ...) { - rnams <- all.vars(as.formula(paste("~ ", ran.term, sep = ""))) - fnams <- all.vars(as.formula(paste("~ ", fix.term, sep = ""))) - if (length(ran.term) > 1) { - labs <- attr(terms(as.formula(model$call$random)), "term.labels") - iterm <- labs[grep(paste(rnams[1], "*.*", rnams[2], sep = ""), labs)] - uv <- sapply(strsplit(iterm, "\\("), "[", 1) - if (uv == "fa") { - predr <- predict(model, classify = ran.term, only = iterm, ...) - } else if (uv %in% c("diag", "corh", "corgh", "us")) { - predr <- predict(model, classify = ran.term, only = ran.term, ...) +associate <- function(model, ran.term = "Site:Genotype", fix.term = "Site:Type", ...){ + rnams <- all.vars(as.formula(paste("~ ", ran.term, sep = ""))) + fnams <- all.vars(as.formula(paste("~ ", fix.term, sep = ""))) + if(length(rnams) > 1){ + labs <- attr(terms(as.formula(model$call$random)), "term.labels") + iterm <- labs[grep(paste(rnams[1], "*.*", rnams[2], sep = ""), labs)] + uv <- sapply(strsplit(iterm, "\\("), "[", 1) + if(uv == "fa") + predr <- predict(model, classify = ran.term, only = iterm, ...) + else if(uv %in% c("diag","corh","corgh","us")) + predr <- predict(model, classify = ran.term, only = ran.term, ...) + tab <- table(c(rnams, fnams)) + dnams <- names(tab)[tab == 1] + snams <- names(tab)[tab == 2] + rnams <- rnams[rnams != snams] + } else { + predr <- predict(model, classify = ran.term, only = ran.term, ...) + dnams <- c(rnams,fnams) } - } else { - predr <- predict(model, classify = ran.term, only = ran.term, ...) - } - pr <- predr$pvals - names(pr) <- gsub("predicted.value", "blups", names(pr)) - names(pr) <- gsub("std.error", "blups.std.error", names(pr)) - predf <- predict(model, classify = fix.term, ...) - pf <- predf$pvals - pf <- pf[!is.na(pf$predicted.value), ] - names(pf) <- gsub("predicted.value", "blues", names(pf)) - names(pf) <- gsub("std.error", "blues.std.error", names(pf)) - dat <- eval(model$call$data) - datr <- dat[, unique(c(rnams, fnams))] - datr <- datr[!duplicated(datr[, rnams]), ] - pri <- merge(pr[-ncol(pr)], datr, by = rnams, all.x = TRUE) - pall <- merge(pri, pf[-ncol(pf)], by = fnams, all.x = TRUE) - pall + pr <- predr$pvals + names(pr) <- gsub("predicted.value", "blups", names(pr)) + names(pr) <- gsub("std.error", "blups.std.error", names(pr)) + predf <- predict(model, classify = fix.term, ...) + pf <- predf$pvals + pf <- pf[!is.na(pf$predicted.value),] + names(pf) <- gsub("predicted.value", "blues", names(pf)) + names(pf) <- gsub("std.error", "blues.std.error", names(pf)) + dat <- eval(model$call$data) + datr <- dat[,dnams] + datr <- datr[!duplicated(datr),] + pri <- merge(pr[-ncol(pr)], datr, by = rnams, all.x = TRUE) + pall <- merge(pri, pf[-c(ncol(pf) - 1, ncol(pf))], by = fnams, all.x = TRUE) + if(any(is.na(pall$blues))){ + if(length(rnams) > 1) + spv <- pall[[snams]] + else spv <- factor(rep(1, nrow(pall))) + palls <- split(pall, spv) + pall <- do.call("rbind", lapply(palls, function(el){ + nab <- is.na(el$blues) + el$Type.blue <- as.character(el$Type) + el$Type.blue[nab] <- "Site Average" + el$blues[nab] <- mean(el$blues[!nab]) + el})) + } + pall } diff --git a/R/clones.R b/R/clones.R index 9c72054..790b972 100644 --- a/R/clones.R +++ b/R/clones.R @@ -39,53 +39,49 @@ #' } #' #' @export -#' +#' #' @examples #' \dontrun{ #' JULES COMPLETE #' } -phenClones <- function(model, cross, matching = "Genotype", Envir = NULL, no.samp = 1000, sep = "_") { - mg <- as.character(cross$pheno[[matching]]) - mgs <- lapply(mg, function(el, sep) { - el <- unlist(strsplit(el, sep)) - if (length(el) > 1) { - t(combn(el, 2)) - } else { - NULL - } - }, sep = sep) - mgs <- mgs[!sapply(mgs, is.null)] - mgd <- do.call("rbind", mgs) - if (!is.null(Envir)) { - iterm <- paste(Envir, matching, sep = ":") - pvals <- predict(model, classify = iterm, only = iterm)$pvals - pvlist <- split(pvals, pvals[[Envir]]) - levs <- levels(pvals[[Envir]]) - } else { - pvlist <- list(predict(model, classify = matching, only = matching)$predictions$pvals) - } - corlist <- list() - for (j in 1:length(pvlist)) { - pvt <- pvlist[[j]] - cg1 <- pvt$predicted.value[pmatch(mgd[, 1], pvt[[matching]], duplicates.ok = TRUE)] - cg2 <- pvt$predicted.value[pmatch(mgd[, 2], pvt[[matching]], duplicates.ok = TRUE)] - cor.samp <- c() - for (i in 1:no.samp) { - ts <- sample(pvt$predicted.value, dim(mgd) * 2, replace = FALSE) - cor.samp[i] <- cor(ts[1:(length(ts) / 2)], ts[(length(ts) / 2 + 1):length(ts)]) +phenClones <- function(model, cross, matching = "Genotype", Envir = NULL, no.samp = 1000, sep = "_"){ + mg <- as.character(cross$pheno[[matching]]) + mgs <- lapply(mg, function(el, sep){ + el <- unlist(strsplit(el, sep)) + if(length(el) > 1) + t(combn(el, 2)) + else NULL + }, sep = sep) + mgs <- mgs[!sapply(mgs, is.null)] + mgd <- do.call("rbind", mgs) + if(!is.null(Envir)){ + iterm <- paste(Envir, matching, sep = ":") + pvals <- predict(model, classify = iterm, only = iterm)$pvals + pvlist <- split(pvals, pvals[[Envir]]) + levs <- levels(pvals[[Envir]]) + } else + pvlist <- list(predict(model, classify = matching, only = matching)$predictions$pvals) + corlist <- list() + for(j in 1:length(pvlist)){ + pvt <- pvlist[[j]] + cg1 <- pvt$predicted.value[pmatch(mgd[,1], pvt[[matching]], duplicates.ok = TRUE)] + cg2 <- pvt$predicted.value[pmatch(mgd[,2], pvt[[matching]], duplicates.ok = TRUE)] + cor.samp <- c() + for(i in 1:no.samp) { + ts <- sample(pvt$predicted.value, dim(mgd)*2, replace = FALSE) + cor.samp[i] <- cor(ts[1:(length(ts)/2)], ts[(length(ts)/2 + 1):length(ts)]) + } + df <- dim(mgd)[1] - 2 + cs <- c(cor.samp, cor(cg1, cg2, use = "complete.obs")) + pv <- 1 - pf((cs^2)*df/(1 - cs^2), 1, df) + pva <- pv[1:(length(pv) - 1)] + corlist[[j]] <- c(length(pva[pva < 0.05])/no.samp, cs[length(cs)], pv[length(pv)]) } - df <- dim(mgd)[1] - 2 - cs <- c(cor.samp, cor(cg1, cg2, use = "complete.obs")) - pv <- 1 - pf((cs^2) * df / (1 - cs^2), 1, df) - pva <- pv[1:(length(pv) - 1)] - corlist[[j]] <- c(length(pva[pva < 0.05]) / no.samp, cs[length(cs)], pv[length(pv)]) - } - res <- cbind.data.frame(t(do.call("cbind", corlist))) - names(res) <- c("Type1", "Correlation", "P-value") - if (!is.null(Envir)) { - res <- cbind.data.frame(levs, res) - } - res + res <- cbind.data.frame(t(do.call("cbind", corlist))) + names(res) <- c("Type1","Correlation","P-value") + if(!is.null(Envir)) + res <- cbind.data.frame(levs, res) + res } #' Collapse Cloned Genotypes into Single Levels @@ -125,20 +121,18 @@ phenClones <- function(model, cross, matching = "Genotype", Envir = NULL, no.sam #' df <- data.frame(Genotype = factor(c("A","B","C","D","E"))) #' phenfixClones(df, cross) #' } -phenfixClones <- function(data, cross, matching = "Genotype", sep = "_") { - mg <- as.character(cross$pheno[[matching]]) - mgs <- lapply(mg, function(el, sep) { - el <- unlist(strsplit(el, sep)) - if (length(el) > 1) { - el - } else { - NULL +phenfixClones <- function(data, cross, matching = "Genotype", sep = "_"){ + mg <- as.character(cross$pheno[[matching]]) + mgs <- lapply(mg, function(el, sep){ + el <- unlist(strsplit(el, sep)) + if(length(el) > 1) + el + else NULL + }, sep = sep) + mgs <- mgs[!sapply(mgs, is.null)] + for(i in 1:length(mgs)){ + levs <- levels(data[[matching]]) + levels(data[[matching]])[levs %in% mgs[[i]]] <- paste(mgs[[i]], collapse = sep) } - }, sep = sep) - mgs <- mgs[!sapply(mgs, is.null)] - for (i in 1:length(mgs)) { - levs <- levels(data[[matching]]) - levels(data[[matching]])[levs %in% mgs[[i]]] <- paste(mgs[[i]], collapse = sep) - } - data + data } diff --git a/R/conv3.R b/R/conv3.R index 3b8f4ff..4d32055 100644 --- a/R/conv3.R +++ b/R/conv3.R @@ -1,84 +1,233 @@ -#' Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments +#' Convert Treatment × Site BLUPs into Efficiency and Responsiveness #' -#' @param model Final full Treatment x Site x Variety model -#' @param Env Treatment x Site x Variety term -#' @param levs Named treatment levels used in transformation. i.e c("Treat1", "Treat2") would regress Treat2 on Treat1 -#' @param sep separator used for Treat x Site names. Defaults to `-` -#' @param ... Other arguments passed to `predict` +#' Given an \code{asreml} model with a Treatment × Site × Variety structure, +#' this function extracts BLUPs for a Treatment × Site interaction (classified +#' by Variety) and re-parameterizes them into: +#' \itemize{ +#' \item \strong{Efficiency}: the BLUP under a baseline treatment (\code{levs[1]}) +#' \item \strong{Responsiveness}: the residual BLUP under a second treatment +#' (\code{levs[2]}) after regressing \code{levs[2]} on \code{levs[1]} +#' using the random-effect covariance matrix +#' } +#' +#' For each site \eqn{s}, the regression coefficient is: +#' +#' \deqn{ +#' \beta_s = \mathrm{Cov}(T_1, T_2) / \mathrm{Var}(T_1) +#' } +#' +#' and responsiveness BLUPs are computed as: +#' +#' \deqn{ +#' b_{\mathrm{resp}} = b_{T_2} - \beta_s b_{T_1}. +#' } +#' +#' The function also returns the transformed covariance matrix +#' \eqn{G_{\mathrm{trans}} = T G T^\top} corresponding to the +#' efficiency/responsiveness parameterization. +#' +#' \strong{Important:} +#' If the fitted model assumes independent Treatment × Site effects +#' (e.g. \code{random = ~ TSite:Variety}), then the covariance between +#' treatments is zero and \eqn{\beta_s = 0}. In that case, +#' responsiveness reduces to the BLUP under \code{levs[2]}. +#' +#' @param model An \code{asreml} fitted model object. +#' @param Env Character string of the form \code{":"}, +#' e.g. \code{"TSite:Variety"}, used in \code{predict(..., classify = Env)}. +#' @param levs Character vector of length 2 giving the two treatment levels. +#' \code{levs[1]} is treated as the baseline (efficiency). +#' @param sep Separator used in Treatment × Site factor level names. +#' @param ... Additional arguments passed to \code{predict()}. +#' +#' @return A list containing: +#' \itemize{ +#' \item \code{blups}: data frame with columns \code{Site}, \code{Variety}, +#' \code{levs[1]}, \code{levs[2]}, and \code{resp} +#' \item \code{TGmat}: transformed covariance matrix +#' \item \code{Gmat}: original covariance matrix +#' \item \code{beta}: per-site regression coefficient +#' \item \code{sigr}: per-site residual variance +#' \item \code{tmat}: transformation matrix +#' } #' -#' @return Returns a list with BLUPs, and some other stuff JULES COMPLETE #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' library(asreml) +#' library(agridat) +#' +#' data(besag.met) +#' dat <- besag.met +#' +#' dat$county <- factor(dat$county) +#' dat$gen <- factor(dat$gen) +#' dat$rep <- factor(dat$rep) +#' dat$block <- factor(dat$block) +#' +#' # Create artificial 2-level treatment within each site +#' dat$Treat <- ave(seq_len(nrow(dat)), dat$county, FUN = function(i) { +#' rep(c("Treat1","Treat2"), length.out = length(i)) +#' }) +#' dat$Treat <- factor(dat$Treat) +#' +#' dat$TSite <- interaction(dat$Treat, dat$county, sep = "-") +#' dat$Variety <- dat$gen +#' +#' m <- asreml( +#' fixed = yield ~ Treat, +#' random = ~ rep + block + Variety + TSite:Variety, +#' data = dat +#' ) +#' +#' out <- conv( +#' model = m, +#' Env = "TSite:Variety", +#' levs = c("Treat1","Treat2") +#' ) +#' +#' head(out$blups) +#' +#' # In this model, treatments are independent, +#' # so beta will be zero: +#' out$beta #' } #' -conv <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", ...){ - if(is.null(levs)) - stop("Treatment levels cannnot be NULL.") - evnam <- unlist(strsplit(Env, ":")) - enam <- evnam[1]; vnam <- evnam[2] +conv <- function(model, + Env = "TSite:Variety", + levs = NULL, + sep = "-", + ...) { + + if (is.null(levs) || length(levs) != 2) + stop("levs must be a character vector of length 2.") + + evnam <- strsplit(Env, ":", fixed = TRUE)[[1]] + enam <- evnam[1] + vnam <- evnam[2] + + # Extract BLUPs safely (ASReml 4 compatible) + pred <- stats::predict(model, classify = Env, ...) + pvals <- pred$pvals + + pred_col <- grep("predicted\\.value|predicted|estimate", + names(pvals), value = TRUE) + + if (length(pred_col) != 1) + stop("Could not uniquely identify predicted value column.") + + names(pvals)[names(pvals) == pred_col] <- "blup" + + # Ensure interaction column exists + if (!(enam %in% names(pvals))) { + + # ASReml returns split columns (TSite + Variety) + if (all(evnam %in% names(pvals))) { + pvals[[enam]] <- as.character(pvals[[evnam[1]]]) + } else { + stop("Could not reconstruct interaction column ", enam) + } + } + + tsnams <- unique(as.character(pvals[[enam]])) + + if (!any(grepl(sep, tsnams, fixed = TRUE))) + stop("Separator not found in Treatment by Site levels.") + + st <- strsplit(tsnams, sep, fixed = TRUE) + + if (any(lengths(st) != 2)) + stop("Interaction levels must split into exactly 2 parts.") + + tnam <- vapply(st, `[`, "", 1) + snam <- vapply(st, `[`, "", 2) + + if (!all(levs %in% tnam)) + stop("Treatment levels not found in interaction levels.") + + usnams <- unique(snam) + + # Extract G structure rterm <- attr(terms.formula(model$call$random), "term.labels") rterm <- rterm[grep(paste(evnam, collapse = "|"), rterm)] - if(substring(rterm, 1, 2) == "fa"){ - sumfa <- fa.asreml(model, trunc.char = NULL) - pvals <- sumfa$blups[[rterm]]$blups[,1:3] - Gmat <- sumfa$gammas[[rterm]]$Gmat - } - else { - pred <- predict(model, classify = Env, only = Env, ...) + + # FA Models + if (length(rterm) == 1 && substring(rterm, 1, 2) == "fa") { + + sumfa <- ASExtras4::fa.asreml(model, trunc.char = NULL) + Gmat <- sumfa$gammas[[rterm]]$Gmat + + } else { + + # Standard case: often scalar variance Gmat <- summary(model, vparameters = TRUE)$vparameters[[Env]] - pvals <- pred$pvals - names(pvals)[3] <- "blup" - } - tsnams <- dimnames(Gmat)[[2]] - if(!length(grep(sep, tsnams))) - stop("Separator not found in Treatment by Site factor.") - st <- strsplit(tsnams, split = sep) - tnam <- sapply(st, function(el) el[1]) - snam <- sapply(st, function(el) el[2]) - if(!all(levs %in% c(snam, tnam))) - stop("Treatment levels do not exist in ", enam) - if(all(levs %in% snam)){ - tnam <- snam - snam <- sapply(st, function(el) el[1]) + + # If scalar, convert to diagonal matrix + if (length(Gmat) == 1) { + Gmat <- diag(Gmat, length(tsnams)) + dimnames(Gmat) <- list(tsnams, tsnams) + } } - usnams <- unique(snam) + + # Compute efficiency / Responsiveness tmat <- diag(nrow(Gmat)) - beta <- sigr <- c() - blist <- list() - for(i in 1:length(usnams)){ - inds <- (1:length(snam))[snam %in% usnams[i]] + beta <- sigr <- numeric(length(usnams)) + blist <- vector("list", length(usnams)) + + for (i in seq_along(usnams)) { + + inds <- which(snam == usnams[i]) names(inds) <- tnam[inds] - whl <- (1:2)[levs %in% names(inds)] - if(length(whl) == 2){ - tind <- inds[levs] - mat <- Gmat[tind, tind] - beta[i] <- mat[1,2]/mat[1,1] - rho <- mat[1,2]/sqrt(mat[1,1]*mat[2,2]) - sigr[i] <- (mat[2,2]*(1 - rho^2)) - tmat[tind[2], tind[1]] <- - beta[i] - blow <- pvals$blup[pvals[[enam]] %in% tsnams[tind[1]]] - bhigh <- pvals$blup[pvals[[enam]] %in% tsnams[tind[2]]] - bresp <- bhigh - beta[i]*blow - blist[[i]] <- cbind.data.frame(blow, bhigh, bresp) + + if (!all(levs %in% names(inds))) next + + tind <- inds[levs] + mat <- Gmat[tind, tind, drop = FALSE] + + # if covariance available + if (nrow(mat) == 2 && mat[1,1] != 0) { + beta[i] <- mat[1,2] / mat[1,1] + rho <- mat[1,2] / sqrt(mat[1,1] * mat[2,2]) + sigr[i] <- mat[2,2] * (1 - rho^2) } else { - slevs <- levs[whl] - tind <- inds[slevs] - if(whl == 1) - blist[[i]] <- cbind.data.frame(blow = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bhigh = NA, bresp = NA) - else blist[[i]] <- cbind.data.frame(blow = NA, bhigh = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bresp = NA) + beta[i] <- 0 + sigr[i] <- mat[2,2] } + + tmat[tind[2], tind[1]] <- -beta[i] + + blow <- pvals$blup[pvals[[enam]] == tsnams[tind[1]]] + bhigh <- pvals$blup[pvals[[enam]] == tsnams[tind[2]]] + bresp <- bhigh - beta[i] * blow + + blist[[i]] <- data.frame(blow, bhigh, bresp) } + TGmat <- tmat %*% Gmat %*% t(tmat) - tsnams <- gsub(levs[2], "resp", tsnams) - tsnams <- gsub(levs[1], "eff", tsnams) - dimnames(TGmat) <- list(tsnams, tsnams) - blups <- do.call("rbind.data.frame", blist) - names(blups)[1:3] <- c(levs, "resp") + + # relabel matrix + newnams <- gsub(levs[1], "eff", tsnams) + newnams <- gsub(levs[2], "resp", newnams) + dimnames(TGmat) <- list(newnams, newnams) + + blups <- do.call(rbind, blist) + names(blups) <- c(levs[1], levs[2], "resp") + glev <- unique(as.character(pvals[[vnam]])) - blups <- cbind.data.frame(Site = rep(usnams, each = length(glev)), Variety = rep(glev, length(usnams)), blups) - return(list(blups = blups, TGmat = TGmat, Gmat = Gmat, beta = beta, sigr = sigr, tmat = tmat)) + blups <- data.frame( + Site = rep(usnams, each = length(glev)), + Variety = rep(glev, length(usnams)), + blups + ) + + return(list( + blups = blups, + TGmat = TGmat, + Gmat = Gmat, + beta = beta, + sigr = sigr, + tmat = tmat + )) } diff --git a/R/extract.R b/R/extract.R index f7ec087..0209bcc 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,65 +1,101 @@ -#' Title +#' Extract a rectangular grid region within groups and optionally pad missing cells #' -#' @param data -#' @param pattern -#' @param match -#' @param split -#' @param pad -#' @param keep -#' @param fill +#' Given a data frame containing two grid coordinate columns (e.g. Row and Column), +#' this function splits the data into groups (e.g. by Block), identifies the +#' bounding rectangle of rows whose \code{Type} matches \code{match}, then returns +#' all rows within that rectangle for each group. Optionally, missing Row-by-Column +#' combinations inside the rectangle are padded by adding new rows. +#' +#' @param data A data frame containing the grouping column specified by \code{split}, +#' the two coordinate columns specified by \code{pattern}, and a column named +#' \code{Type} used for matching. +#' @param pattern A length-1 character string of the form \code{":"} giving +#' the names of the two coordinate columns (default \code{"Row:Column"}). +#' @param match A character vector of \code{Type} values used to define the +#' bounding rectangle within each group (default \code{"DH"}). +#' @param split A length-1 character string giving the column name used to split +#' \code{data} into groups (default \code{"Block"}). +#' @param pad Logical; if \code{TRUE}, pad missing coordinate combinations inside the +#' rectangle by adding new rows (default \code{TRUE}). +#' @param keep Integer index (or indices) of columns to copy from existing rows into +#' padded rows. Values are copied from the first \code{n_missing} rows of the extracted +#' rectangle. Default is \code{4}. +#' @param fill Optional integer index (or indices) of columns to fill with the +#' string \code{"Blank"} in padded rows. If \code{NULL} (default), nothing is filled. +#' +#' @details +#' For each group defined by \code{split}, rows with \code{Type} in \code{match} are used +#' to determine the minimum and maximum values of the two coordinate columns specified +#' by \code{pattern}. All rows in the group whose coordinates fall within these inclusive +#' bounds are returned. +#' +#' When \code{pad = TRUE}, the function constructs a contingency table of the extracted +#' coordinates and adds rows for any missing Row-by-Column combinations. Existing rows are +#' marked with \code{add = "old"} and padded rows with \code{add = "new"}. #' #' @return +#' A data frame containing the extracted (and optionally padded) rows from all groups, +#' sorted by the first and second coordinate columns. If \code{pad = TRUE}, an additional +#' column \code{add} is included to indicate whether a row is original (\code{"old"}) or +#' padded (\code{"new"}). +#' #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' df <- data.frame( +#' Block = rep(1:2, each = 6), +#' Type = rep(c("DH", "X"), times = 6), +#' Row = rep(c("1","1","2"), times = 4), +#' Column= rep(c("1","2"), times = 6), +#' Value = seq_len(12) +#' ) +#' +#' # Extract rectangle defined by Type == "DH" within each Block and pad missing cells +#' out <- extract(df, pattern = "Row:Column", match = "DH", split = "Block", +#' pad = TRUE, keep = 5, fill = 4) +#' head(out) #' } -extract <- function(data, pattern = "Row:Column", match = "DH", split = "Block", pad = TRUE, keep = 4, fill = NULL) { - pat <- unlist(strsplit(pattern, ":")) - if (!(split %in% names(data))) { - stop("split argument not in data") - } - if (!all(pat %in% names(data))) { - stop("One or more of the variables in pattern argument not in data") - } - spd <- split(data, data[[split]]) - spd <- lapply(spd, function(el, match, pat, pad) { - temp <- el[as.character(el$Type) %in% match, ] - print(dim(temp)) - rr <- range(as.numeric(as.character(temp[, pat[1]]))) - rc <- range(as.numeric(as.character(temp[, pat[2]]))) - print(rr) - print(rc) - elr <- (1:nrow(el))[el[[pat[1]]] %in% as.character(rr[1]:rr[2])] - elc <- (1:nrow(el))[el[[pat[2]]] %in% as.character(rc[1]:rc[2])] - ela <- intersect(elr, elc) - temp <- el[ela[order(ela)], ] - temp <- cbind.data.frame(lapply(temp, function(el) { - if (is.factor(el)) factor(el) else el - })) - if (pad) { - temp$add <- "old" - tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) - wh <- which(tabs == 0, arr.ind = TRUE) - if (length(wh)) { - whn <- pmatch(pat, names(temp)) - tp <- temp[1:nrow(wh), ] - tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) - tp[, keep] <- temp[1:nrow(wh), keep] - tp[[pat[1]]] <- factor(rownames(tabs)[wh[, 1]]) - tp[[pat[2]]] <- factor(colnames(tabs)[wh[, 2]]) - if (!is.null(fill)) { - tp[, fill] <- "Blank" +extract <- function(data, pattern = "Row:Column", match = "DH", split = "Block", pad = TRUE, keep = 4, fill = NULL){ + pat <- unlist(strsplit(pattern, ":")) + if(!(split %in% names(data))) + stop("split argument not in data") + if(!all(pat %in% names(data))) + stop("One or more of the variables in pattern argument not in data") + spd <- split(data, data[[split]]) + spd <- lapply(spd, function(el, match, pat, pad){ + temp <- el[as.character(el$Type) %in% match,] + print(dim(temp)) + rr <- range(as.numeric(as.character(temp[,pat[1]]))) + rc <- range(as.numeric(as.character(temp[,pat[2]]))) + print(rr) + print(rc) + elr <- (1:nrow(el))[el[[pat[1]]] %in% as.character(rr[1]:rr[2])] + elc <- (1:nrow(el))[el[[pat[2]]] %in% as.character(rc[1]:rc[2])] + ela <- intersect(elr, elc) + temp <- el[ela[order(ela)],] + temp <- cbind.data.frame(lapply(temp, function(el){ if(is.factor(el)) factor(el) else el})) + if(pad){ + temp$add <- "old" + tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) + wh <- which(tabs == 0, arr.ind = TRUE) + if(length(wh)){ + whn <- pmatch(pat, names(temp)) + tp <- temp[1:nrow(wh),] + tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) + tp[,keep] <- temp[1:nrow(wh),keep] + tp[[pat[1]]] <- factor(rownames(tabs)[wh[,1]]) + tp[[pat[2]]] <- factor(colnames(tabs)[wh[,2]]) + if(!is.null(fill)) + tp[,fill] <- "Blank" + tp$add <- "new" + temp <- rbind.data.frame(temp, tp) + } } - tp$add <- "new" - temp <- rbind.data.frame(temp, tp) - } - } - temp - }, match, pat, pad) - ad <- do.call("rbind.data.frame", spd) - ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) - ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) - ad[order(ad[[pat[1]]], ad[[pat[2]]]), ] + temp + }, match, pat, pad) + ad <- do.call("rbind.data.frame", spd) + ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) + ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) + ad[order(ad[[pat[1]]],ad[[pat[2]]]),] } diff --git a/R/fast.R b/R/fast.R index 9c6bd43..f18123e 100644 --- a/R/fast.R +++ b/R/fast.R @@ -1,45 +1,350 @@ -#' FAST: overall performance and stability for interpreting Factor Analytic models +#' FAST: Overall performance and stability from a factor-analytic MET model #' -#' @param model -#' @param dat -#' @param term -#' @param ... +#' Compute FAST-style summaries (overall performance and stability) from an +#' ASReml factor analytic (FA) mixed model fitted to variety-by-environment +#' (VE) effects. The method is based on the latent regression interpretation of +#' the FA model described by Smith & Cullis (2018). +#' +#' @details +#' Consider the common VE (CVE) effects \eqn{\tilde\beta_{ij}} for genotype +#' \eqn{i} in environment \eqn{j} under an FA model with rotated loadings +#' \eqn{\hat\lambda_{rj}} and genotype scores \eqn{\tilde f_{ri}}. Smith & Cullis (2018) +#' separate the first factor from the remainder via the *first latent regression* +#' representation, where \eqn{\tilde\epsilon_{ij}} collects the contributions from factors +#' \eqn{r = 2,\dots,k} (and can be interpreted as deviations about the first +#' latent regression line). +#' +#' When (almost) all rotated loadings for factor 1 are positive, Smith & Cullis (2018) +#' define **overall performance** (OP) for genotype \eqn{i} as the fitted value at the +#' mean of the factor-1 loadings: +#' \deqn{OP_i = \bar\lambda_1 \tilde f_{1i},} +#' where \eqn{\bar\lambda_1} is the mean of \eqn{\hat\lambda_{1j}} across environments. +#' +#' They define **stability** as the root mean squared deviation (RMSD) about the first +#' latent regression line: +#' \deqn{RMSD_i = \sqrt{\frac{1}{p}\sum_{j=1}^p (\tilde\beta_{ij} - \hat\lambda_{1j}\tilde f_{1i})^2},} +#' where \eqn{p} is the number of environments. +#' +#' This function reconstructs per-environment fitted contributions +#' \eqn{\widehat{\mathrm{fitted}}_{rij} = \hat\lambda_{rj}\tilde f_{ri}} and forms +#' \code{CVE} as their sum across factors. It then computes: +#' \itemize{ +#' \item \code{OP = mean(loads1) * score1} (matches \eqn{OP_i} above), +#' \item \code{dev = CVE - fitted1} (corresponds to \eqn{\tilde\beta_{ij}-\hat\lambda_{1j}\tilde f_{1i}}), +#' \item \code{stab = mean(dev^2)} by genotype (this is *MSD*; RMSD is \code{sqrt(stab)}). +#' } +#' +#' @param model An \code{asreml} model object containing the FA term specified by \code{term}. +#' @param dat A data frame used only to obtain factor levels for the environment and genotype terms +#' named in \code{term}. Must contain those columns as factors (e.g. \code{Site} and \code{Genotype}). +#' @param term Character string giving the FA term of interest, typically of the form +#' \code{"fa(, ):"} (default \code{"fa(Site, 4):Genotype"}). +#' @param ... Additional arguments passed to \code{ASExtras4::fa.asreml()}. +#' +#' @return A data frame with one row per Environment \eqn{\times} Genotype combination containing: +#' \itemize{ +#' \item the environment factor (e.g. \code{Site}), +#' \item \code{loads1}, \code{loads2}, ... and \code{spec.var} (from \code{ASExtras4::fa.asreml()}), +#' \item the genotype factor (e.g. \code{Genotype}), +#' \item \code{score1}, \code{score2}, ... (scores), +#' \item \code{fitted1}, \code{fitted2}, ... (per-factor fitted contributions), +#' \item \code{CVE} (sum of fitted contributions across factors), +#' \item \code{VE} (\code{CVE + spec.var}; included for convenience), +#' \item \code{OP} (overall performance; constant within genotype), +#' \item \code{dev} (deviation from factor-1 fitted contribution), +#' \item \code{stab} (mean squared deviation by genotype; \code{sqrt(stab)} gives RMSD scale). +#' } +#' +#' @references +#' Smith, A. B. & Cullis, B. R. (2018). Plant breeding selection tools built on factor analytic +#' mixed models for multi-environment trial data. *Euphytica*, 214:143. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' library(asreml) +#' data(oats) +#' +#' oats$Nitrogen <- as.factor(oats$Nitrogen) +#' oats$Variety <- as.factor(oats$Variety) +#' oats$Block <- as.factor(oats$Block) +#' +#' m_fa <- asreml( +#' fixed = yield ~ Nitrogen, # Fixed treatment levels +#' random = ~ Block + Block:Variety + fa(Nitrogen, 2):Variety, +#' residual = ~ units, +#' data = oats +#' ) +#' +#' out <- fast(m_fa, dat = oats, term = "fa(Nitrogen, 2):Variety") +#' +#' op_by_var <- tapply(out$OP, out$Variety, unique) +#' msd_by_var <- tapply(out$stab, out$Variety, unique) +#' rmsd_by_var <- sqrt(msd_by_var) +#' +#' summary_tbl <- data.frame( +#' Variety = names(op_by_var), +#' OP = as.numeric(op_by_var), +#' MSD = as.numeric(msd_by_var), +#' RMSD = as.numeric(rmsd_by_var), +#' row.names = NULL +#' ) +#' +#' summary_tbl[order(-summary_tbl$OP), ] +#' } +#' +fast <- function(model, term = "fa(Site, 4):Genotype", ...){ + dat <- eval(model$call$data) + str <- strsplit(term, ":")[[1]] + sterm <- sapply(strsplit(gsub("fa\\(|\\))", "", str[grep("fa", str)]), ","), "[", 1) + gterm <- str[-grep("fa", str)] + sfa <- fa.asreml(model, ...) + scores <- sfa$blups[[term]]$scores + lvar <- cbind.data.frame(sfa$gammas[[term]]$"rotated loads", sfa$gammas[[term]]$"specific var") + scores <- do.call("cbind.data.frame", tapply(scores$blupr, scores[[sterm]], function(el) el)) + names(scores) <- ns <- paste("score", 1:ncol(scores), sep = "") + nk <- dim(scores)[2] + scores <- cbind.data.frame(levels(dat[[gterm]]), scores) + names(scores)[1] <- gterm + sa <- scores[rep(1:nrow(scores), nrow(lvar)),] + lvar <- lvar[rep(1:nrow(lvar), each = nrow(scores)),] + nl <- paste("loads", 1:(ncol(lvar) - 1), sep = "") + names(lvar) <- c(nl, "spec.var") + ls <- cbind.data.frame(rep(levels(dat[[sterm]]), each = nrow(scores)), lvar, sa) + names(ls)[1] <- sterm + for(i in 1:nk){ + ts <- paste("fitted", i, sep = "") + ls[[ts]] <- ls[[ns[i]]]*ls[[nl[[i]]]] + } + print(names(ls)) + ls$CVE <- rowSums(ls[,grep("fitted", names(ls)), drop = FALSE]) + ls$VE <- ls$CVE + ls[,"spec.var"] + ls$OP <- mean(ls$loads1)*ls$score1 + if(nk > 1){ + ls$dev <- ls$CVE - ls$fitted1 + ls$stab <- sqrt(tapply(ls$dev^2, ls[[gterm]], mean)[as.character(ls[[gterm]])]) + } + ls +} + +#' FAST-IC: FAST summaries within interaction classes from a factor-analytic MET model +#' +#' Compute FAST-style summaries of genotype performance and stability within +#' *interaction classes* (ICs) derived from a factor-analytic (FA) mixed model +#' fitted to multi-environment trial (MET) data using ASReml. +#' +#' @details +#' Factor analytic (FA) linear mixed models are widely used to model +#' genotype-by-environment (G×E) interactions in multi-environment trials. +#' In this framework, the common variety-by-environment effects can be written +#' in terms of environment loadings and genotype scores. +#' +#' Smith & Cullis (2018) introduced the FAST approach, which interprets the FA +#' model using a *latent regression representation*. In this interpretation, +#' the first factor captures the dominant pattern of genotype response across +#' environments, allowing summary measures of: +#' +#' \itemize{ +#' \item \strong{Overall performance (OP)} — the expected genotype response at +#' the mean of the first factor loadings. +#' +#' \item \strong{Stability} — typically measured using the root mean squared +#' deviation (RMSD) of genotype responses from the first latent regression line. +#' } +#' +#' Smith et al. (2021) extended this idea by introducing +#' *interaction classes (iClasses)*. These classes group environments according +#' to the **sign pattern of their FA loadings**, reflecting different +#' patterns of genotype response across environments. +#' +#' For a model with \eqn{k} factors, interaction classes are defined using the +#' sign pattern of the first \code{ic.num} loadings: +#' +#' \itemize{ +#' \item \code{"p"} indicates a positive loading +#' \item \code{"n"} indicates a negative loading +#' } +#' +#' For example, when \code{ic.num = 2}: +#' +#' \itemize{ +#' \item \code{"pp"} means both loadings are positive +#' \item \code{"pn"} means loading 1 positive, loading 2 negative +#' \item \code{"np"} means loading 1 negative, loading 2 positive +#' \item \code{"nn"} means both loadings are negative +#' } +#' +#' FAST summaries can then be computed **within each interaction class**. +#' This produces class-specific summaries of genotype performance and stability. +#' +#' In this function: +#' +#' \itemize{ +#' \item Genotype scores \eqn{\tilde f_{ri}} are obtained from the FA model. +#' \item Environment loadings \eqn{\hat\lambda_{rj}} define interaction classes. +#' \item The fitted FA contributions +#' \eqn{\widehat{fitted}_{rij} = \hat\lambda_{rj}\tilde f_{ri}} are reconstructed. +#' \item The common variety-by-environment effect (CVE) is obtained as the +#' sum of fitted contributions across factors. +#' } +#' +#' Within each interaction class \eqn{c}, overall performance is calculated as: +#' +#' \deqn{ +#' OP_{i,c} = +#' \sum_{r=1}^{ic.num} +#' \bar{\lambda}_{r,c} \tilde f_{ri} +#' } +#' +#' where \eqn{\bar{\lambda}_{r,c}} is the mean loading of environments in class +#' \eqn{c}. +#' +#' Stability is summarised using the root mean squared deviation (RMSD): +#' +#' \deqn{ +#' RMSD_{i,c} = +#' \sqrt{ +#' \frac{1}{|c|} +#' \sum_{j \in c} +#' \left( +#' \tilde\beta_{ij} - +#' \sum_{r=1}^{ic.num}\hat\lambda_{rj}\tilde f_{ri} +#' \right)^2 +#' } +#' } +#' +#' where \eqn{\tilde\beta_{ij}} denotes the FA-predicted common VE effect. +#' +#' These summaries allow genotype performance to be compared within groups +#' of environments that share similar G×E response patterns. +#' +#' @param model An \code{asreml} model object containing the FA term specified by \code{term}. +#' +#' @param term Character string specifying the FA term of interest, +#' typically of the form \code{"fa(, ):"}. +#' Default is \code{"fa(Site, 4):Genotype"}. +#' +#' @param ic.num Integer specifying the number of FA factors used to define +#' interaction classes (default = 2). +#' +#' @param ... Additional arguments passed to \code{ASExtras4::fa.asreml()}. #' #' @return +#' A data frame containing one row for each Environment × Genotype +#' combination with columns including: +#' +#' \itemize{ +#' \item environment factor (e.g. \code{Site}) +#' \item \code{iclass} — interaction class label +#' \item FA loadings (\code{loads1}, \code{loads2}, ...) +#' \item \code{spec.var} — specific variance +#' \item genotype scores (\code{score1}, \code{score2}, ...) +#' \item fitted FA contributions (\code{fitted1}, \code{fitted2}, ...) +#' \item \code{CVE} — common variety-by-environment effect +#' \item \code{OP} — overall performance within interaction class +#' \item \code{dev} — deviation from fitted FA contribution +#' \item \code{RMSD} — stability within interaction class +#' } +#' +#' @references +#' Smith, A., Norman, A., Kuchel, H., & Cullis, B. (2021). +#' Plant variety selection using interaction classes derived from factor analytic +#' linear mixed models: Models with independent variety effects. +#' *Frontiers in Plant Science*, 12, 737462. +#' https://doi.org/10.3389/fpls.2021.737462 +#' +#' Smith, A. B., & Cullis, B. R. (2018). +#' Plant breeding selection tools built on factor analytic mixed models for +#' multi-environment trial data. +#' *Euphytica*, 214, 143. +#' #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE -#' } -fast <- function(model, dat = NULL, term = "fa(Site, 4):Genotype", ...) { - # dat <- eval(model$call$data) - str <- strsplit(term, ":")[[1]] - sterm <- sapply(strsplit(gsub("fa\\(|\\))", "", str[grep("fa", str)]), ","), "[", 1) - gterm <- str[-grep("fa", str)] - sfa <- fa.asreml(model, ...) - scores <- sfa$blups[[term]]$scores - lvar <- cbind.data.frame(sfa$gammas[[term]]$"rotated loads", sfa$gammas[[term]]$"specific var") - scores <- do.call("cbind.data.frame", tapply(scores$blupr, scores[[sterm]], function(el) el)) - names(scores) <- ns <- paste("score", 1:ncol(scores), sep = "") - nk <- dim(scores)[2] - scores <- cbind.data.frame(levels(dat[[gterm]]), scores) - names(scores)[1] <- gterm - sa <- scores[rep(1:nrow(scores), nrow(lvar)), ] - lvar <- lvar[rep(1:nrow(lvar), each = nrow(scores)), ] - nl <- paste("loads", 1:(ncol(lvar) - 1), sep = "") - names(lvar) <- c(nl, "spec.var") - ls <- cbind.data.frame(rep(levels(dat[[sterm]]), each = nrow(scores)), lvar, sa) - names(ls)[1] <- sterm - for (i in 1:nk) { - ts <- paste("fitted", i, sep = "") - ls[[ts]] <- ls[[ns[i]]] * ls[[nl[[i]]]] - } - print(names(ls)) - ls$CVE <- rowSums(ls[, grep("fitted", names(ls))]) - ls$VE <- ls$CVE + ls[, "spec.var"] - ls$OP <- mean(ls$loads1) * ls$score1 - ls$dev <- ls$CVE - ls$fitted1 - ls$stab <- tapply(ls$dev^2, ls$Genotype, mean)[as.character(ls$Genotype)] - ls +# library(asreml) +# +# data(oats) +# +# oats$Nitrogen <- as.factor(oats$Nitrogen) +# oats$Variety <- as.factor(oats$Variety) +# oats$Block <- as.factor(oats$Block) +# +# m_fa <- asreml( +# fixed = yield ~ Nitrogen, +# random = ~ Block + Block:Variety + fa(Nitrogen, 2):Variety, +# residual = ~ units, +# data = oats +# ) +# +# out <- fastIC(m_fa, term = "fa(Nitrogen, 2):Variety", ic.num = 2) +# +# # Example: ranking varieties by performance within interaction class +# op_tbl <- aggregate(OP ~ iclass + Variety, out, function(x) unique(x)[1]) +# op_tbl[order(op_tbl$iclass, -op_tbl$OP), ] +# +# # Example: stability comparison +# stab_tbl <- aggregate(RMSD ~ iclass + Variety, out, function(x) unique(x)[1]) +# stab_tbl[order(stab_tbl$iclass, stab_tbl$RMSD), ] +#' } +fastIC <- function(model, term = "fa(Site, 4):Genotype", ic.num = 2, ...){ + dat <- eval(model$call$data) + str <- strsplit(term, ":")[[1]] + sterm <- sapply(strsplit(gsub("fa\\(|\\))", "", str[grep("fa", str)]), ","), "[", 1) + if(length(grep("vm", str))) + gterm <- sapply(strsplit(gsub("vm\\(|\\))", "", str[grep("vm", str)]), ","), "[", 1) + else + gterm <- str[-grep("fa", str)] + sfa <- ASExtras4::fa.asreml(model, ...) + scores <- sfa$blups[[term]]$scores + + scored <- do.call("cbind.data.frame", tapply(scores$blupr, scores[[sterm]], function(el) el)) + names(scored) <- ns <- paste("score", 1:ncol(scored), sep = "") + nk <- dim(scored)[2] + scored <- cbind.data.frame(factor(unique(scores[[gterm]])), scored) + names(scored)[1] <- gterm + loads <- sfa$gammas[[term]]$"rotated loads" + spv <- sfa$gammas[[term]]$"specific var" + lvar <- cbind.data.frame(loads, spv) + sa <- scored[rep(1:nrow(scored), nrow(lvar)),] + lvar <- lvar[rep(1:nrow(lvar), each = nrow(scored)),] + nl <- paste("loads", 1:(ncol(lvar) - 1), sep = "") + names(lvar) <- c(nl, "spec.var") + iclass <- apply(loads[,1:ic.num, drop = FALSE], 1, function(el) paste(ifelse(el > 0, "p", "n"), collapse = "")) + ls <- cbind.data.frame(iclass = factor(rep(iclass, each = nrow(scored))), lvar, sa) + env <- factor(rownames(loads), levels = rownames(loads)) + ls <- cbind.data.frame(rep(env, each = nrow(scored)), ls) + names(ls)[1] <- sterm + for(i in 1:nk){ + ts <- paste("fitted", i, sep = "") + ls[[ts]] <- ls[[ns[i]]]*ls[[nl[[i]]]] + } + ls$CVE <- rowSums(ls[,grep("fitted", names(ls))]) + print(dim(ls)) + ilev <- levels(ls$iclass) + ics <- lapply(split(ls, ls$iclass), function(el, ic.num, gterm){ + mld <- apply(el[,grep("loads", names(el)), drop = FALSE][,1:ic.num], 2, mean) + el$OP <- rowSums(t(mld*t(el[,grep("score", names(el)), drop = FALSE][,1:ic.num]))) + el$dev <- el$CVE - rowSums(el[,grep("fitted", names(el)),drop = FALSE][,1:ic.num]) + el$RMSD <- sqrt(tapply(el$dev^2, el[[gterm]], mean)[as.character(el[[gterm]])]) + el + }, ic.num, gterm) + icd <- do.call("rbind.data.frame", ics) + icd[order(icd[[sterm]], icd[[gterm]]),] } + + + + + + + + + + + + + + diff --git a/R/fineMap.R b/R/fineMap.R index 27e3b23..b42f847 100644 --- a/R/fineMap.R +++ b/R/fineMap.R @@ -1,98 +1,149 @@ -#' Fine map a `wgaim` object +#' Fine-map a QTL region around a nominated marker in a fitted `wgaim` model #' -#' @param model -#' @param intervalObj -#' @param mark -#' @param flanking -#' @param exclusion.window -#' @param ... +#' @description +#' Given a fitted \code{wgaim} object and a \code{qtl::cross} object containing +#' interval- or imputation-based genotype data, \code{fineMap()} scans a window +#' of markers around a nominated marker and refits the model for each candidate +#' marker. For each refit, the function extracts the candidate marker effect +#' (either from the fixed or random component, depending on the model method), +#' computes a Wald-style test statistic, and reports a per-marker p-value and +#' Logarithm of the Odds(LOD). +#' +#' @details +#' The function: +#' \enumerate{ +#' \item Extracts genotype predictors from \code{intervalObj} (either +#' \code{interval.data} or \code{imputed.data}, depending on \code{model$QTL$type}). +#' \item Aligns genotype rows to the phenotype rows used by \code{model}, using +#' the ID column \code{model$QTL$diag$genetic.term}. +#' \item Defines a window of \code{flanking} markers on each side of \code{mark}. +#' \item For each candidate marker in the window, removes nearby markers within +#' \code{exclusion.window} (in the same units as the map positions returned by +#' \code{qtl::pull.map()}) from the background set used to build the covariance. +#' \item Constructs a covariance object via \code{wgaim:::constructCM()} and +#' assigns \code{covObj} into the parent frame (a side-effect required by +#' downstream \code{wgaim}/\code{asreml} update code). +#' \item Refits the model with the candidate marker added and the original QTL +#' marker removed, then extracts an effect estimate and its variance to compute +#' a test statistic, p-value, and LOD. +#' } +#' +#' @param model A fitted \code{wgaim} object (typically fit with ASReml-R via \code{asreml}). +#' The object must contain \code{model$QTL} components used by \code{wgaim}. +#' +#' @param intervalObj A \code{qtl::cross} object containing genotype data used for +#' interval mapping or genotype imputation. Must inherit class \code{"cross"}. +#' +#' @param mark Character scalar. The name of the focal marker around which to fine-map. +#' This must be a marker present in \code{intervalObj}. +#' +#' @param flanking Integer. Number of markers to include on each side of \code{mark} +#' when defining the scan window. Default is \code{50}. +#' +#' @param exclusion.window Numeric. Distance threshold for excluding nearby markers +#' when constructing the background marker set for each candidate marker. Markers +#' with \code{abs(dist_i - dist_j) < exclusion.window} are excluded. Default is \code{10000}. +#' +#' @param ... Additional arguments passed to \code{update.asreml()} when refitting +#' the model for each candidate marker. +#' +#' @return A data frame with one row per scanned marker in the flanking window and columns: +#' \describe{ +#' \item{mark}{Marker name (as in \code{qtl::pull.map(intervalObj, chr)}).} +#' \item{dist}{Map position of the marker (units as returned by \code{qtl::pull.map()}).} +#' \item{pvalue}{Per-marker p-value derived from a Wald-style statistic.} +#' \item{LOD}{Per-marker LOD score derived from the same statistic.} +#' } +#' +#' @section Note: +#' \code{fineMap()} assigns an object named \code{covObj} into the calling +#' environment (\code{parent.frame()}). This is required by internal \code{wgaim} +#' update routines that expect \code{covObj} to exist during model refits. #' #' @import wgaim #' -#' @return #' @export #' #' @examples #' \dontrun{ #' JULES COMPLETE #' } -fineMap <- function(model, intervalObj, mark = NULL, flanking = 50, exclusion.window = 10000, ...) { - resp <- deparse(model$call$fixed[[2]]) - phenoData <- eval(parse(text = paste(resp, ".data", sep = ""))) - if (missing(intervalObj)) { - stop("intervalObj is a required argument") - } - if (!inherits(intervalObj, "cross")) { - stop("intervalObj is not of class \"cross\"") - } - if (is.null(mark)) { - stop("mark argument must be non-NULL.") - } - if (model$QTL$type == "interval") { - gdat <- lapply(intervalObj$geno, function(el) el$interval.data) - } else { - gdat <- lapply(intervalObj$geno, function(el) el$imputed.data) - } - genoData <- do.call("cbind", gdat) - gterm <- model$QTL$diag$genetic.term - state <- model$QTL$diag$state - method <- model$QTL$method - dimnames(genoData) <- list(as.character(intervalObj$pheno[[gterm]]), names(state)) - genoData <- genoData[rownames(genoData) %in% as.character(phenoData[[gterm]]), ] - fm <- find.markerpos(intervalObj, mark) - chrs <- sapply(strsplit(names(state), "\\."), "[", 2) - chr.ind <- chrs %in% fm$chr - state.chr <- state[chrs %in% fm$chr] - mapc <- pull.map(intervalObj, fm$chr)[[1]] - qind <- (1:length(mapc))[names(mapc) %in% mark] - mark.qtl <- gsub("Chr\\.", "X.", names(state.chr)[qind]) - ql <- ifelse(qind - flanking <= 0, 1, qind - flanking) - qr <- ifelse(qind + flanking > length(mapc), length(mapc), qind + flanking) - state.chr[ql:qr] <- 1 - genoChr <- genoData[, names(state.chr)[ql:qr]] - colnames(genoChr) <- gsub("Chr\\.", "X.", colnames(genoChr)) - tmp <- cbind.data.frame(rownames(genoData), genoChr) - colnames(tmp)[1] <- gterm - phenoData <- phenoData[, !(names(phenoData) %in% mark.qtl)] - phenoData$ord <- 1:nrow(phenoData) - phenoData <- merge(phenoData, tmp, by = gterm, all.x = TRUE) - phenoData <- phenoData[order(phenoData$ord), ] - k <- 1 - pvalue <- lod <- c() - for (i in ql:qr) { - wind <- abs(mapc[i] - mapc) < exclusion.window - state.chr[wind] <- 0 - state[chr.ind] <- state.chr - print(length(state)) - genoSub <- genoData[, as.logical(state)] - print(dim(genoData)) - cov.env <- wgaim:::constructCM(genoSub) - covObj <- cov.env$relm - assign("covObj", covObj, envir = parent.frame()) - mark.i <- colnames(genoChr)[k] - print(mark.i) - if (method == "random") { - temp.form <- update.formula(model$call$random, as.formula(paste("~ . - ", mark.qtl, sep = ""))) - temp.form <- update.formula(temp.form, as.formula(paste("~ . + ", mark.i, sep = ""))) - tempmodel <- wgaim:::vModify(model, gterm) - tempmodel <- update.asreml(tempmodel, random. = temp.form, data = phenoData, ...) - } - else { - fix.form <- formula(paste(". ~ . +", mark.i, "-", mark.qtl, sep = "")) - tempmodel <- update.asreml(model, fixed. = fix.form, data = phenoData, ...) +fineMap <- function(model, intervalObj, mark = NULL, flanking = 50, exclusion.window = 10000, ...){ + resp <- deparse(model$call$fixed[[2]]) + phenoData <- eval(parse(text = paste(resp, ".data", sep = ""))) + if (missing(intervalObj)) + stop("intervalObj is a required argument") + if (!inherits(intervalObj, "cross")) + stop("intervalObj is not of class \"cross\"") + if(is.null(mark)) + stop("mark argument must be non-NULL.") + if (model$QTL$type == "interval") + gdat <- lapply(intervalObj$geno, function(el) el$interval.data) + else gdat <- lapply(intervalObj$geno, function(el) el$imputed.data) + genoData <- do.call("cbind", gdat) + gterm <- model$QTL$diag$genetic.term + state <- model$QTL$diag$state + method <- model$QTL$method + dimnames(genoData) <- list(as.character(intervalObj$pheno[[gterm]]), names(state)) + genoData <- genoData[rownames(genoData) %in% as.character(phenoData[[gterm]]),] + fm <- find.markerpos(intervalObj, mark) + chrs <- sapply(strsplit(names(state), "\\."), "[", 2) + chr.ind <- chrs %in% fm$chr + state.chri <- state[chrs %in% fm$chr] + mapc <- pull.map(intervalObj, fm$chr)[[1]] + qind <- (1:length(mapc))[names(mapc) %in% mark] + mark.qtl <- gsub("Chr\\.", "X.", names(state.chri)[qind]) + ql <- ifelse(qind - flanking <= 0, 1, qind - flanking) + qr <- ifelse(qind + flanking > length(mapc), length(mapc), qind + flanking) + state.chri[ql:qr] <- 1 + genoChr <- genoData[,names(state.chri)[ql:qr]] + colnames(genoChr) <- gsub("Chr\\.", "X.", colnames(genoChr)) + tmp <- cbind.data.frame(rownames(genoData), genoChr) + colnames(tmp)[1] <- gterm + phenoData <- phenoData[,!(names(phenoData) %in% mark.qtl)] + phenoData$ord <- 1:nrow(phenoData) + phenoData <- merge(phenoData, tmp, by = gterm, all.x = TRUE) + phenoData <- phenoData[order(phenoData$ord),] + k <- 1 + pvalue <- lod <- c() + for(i in ql:qr){ + wind <- abs(mapc[i] - mapc) <= exclusion.window + state.chr <- state.chri + state.chr[wind] <- 0 + state[chr.ind] <- state.chr + mout <- (1:ncol(genoData))[!as.logical(state)] + genoSub <- genoData[,-mout] + if(ncol(genoSub) > nrow(genoSub)){ + cov.env <- wgaim:::constructCM(genoSub) + covObj <- cov.env$relm + } else { + tempObj <- cbind.data.frame(covObj[,1], genoSub) + names(tempObj)[1] <- names(covObj)[1] + covObj <- tempObj + } + assign("covObj", covObj, envir = parent.frame()) + mark.i <- colnames(genoChr)[k] + print(mark.i) + if(method == "random"){ + temp.form <- update.formula(model$call$random, as.formula(paste("~ . - ", mark.qtl, sep = ""))) + temp.form <- update.formula(temp.form, as.formula(paste("~ . + ", mark.i, sep = ""))) + tempmodel <- wgaim:::vModify(model, gterm) + tempmodel <- update.asreml(tempmodel, random. = temp.form, data = phenoData, ...) + } + else { + fix.form <- formula(paste(". ~ . +", mark.i, "-", mark.qtl, sep = "")) + tempmodel <- update.asreml(model, fixed. = fix.form, data = phenoData, ...) + } + cf <- tempmodel$coefficients[[method]] + whr <- grep(mark.i, rownames(cf)) + mcf <- tempmodel$coefficients[[method]][whr, 1] + vcf <- tempmodel$vcoeff[[method]][whr] + zrat <- mcf/sqrt(vcf * tempmodel$sigma2) + #zrat <- mcf/sqrt(vcf) + pvalue[k] <- round((1 - pchisq(zrat^2, df = 1)), 4) + lod[k] <- round(0.5 * log(exp(zrat^2), base = 10), 4) + print(c(pvalue[k], lod[k])) + k <- k + 1 } - cf <- tempmodel$coefficients[[method]] - whr <- grep(mark.i, rownames(cf)) - print(whr) - mcf <- tempmodel$coefficients[[method]][whr, 1] - vcf <- tempmodel$vcoeff[[method]][whr] - print(c(mcf, vcf)) - # zrat <- mcf/sqrt(vcf * tempmodel$sigma2) - zrat <- mcf / sqrt(vcf) - pvalue[k] <- round((1 - pchisq(zrat^2, df = 1)) / 2, 4) - lod[k] <- round(0.5 * log(exp(zrat^2), base = 10), 4) - print(c(pvalue[k], lod[k])) - k <- k + 1 - } - cbind.data.frame(mark = names(mapc)[ql:qr], dist = mapc[ql:qr], pvalue = pvalue, LOD = lod) + cbind.data.frame(mark = names(mapc)[ql:qr], dist = mapc[ql:qr], pvalue = pvalue, LOD = lod) } diff --git a/R/heritability.R b/R/heritability.R index 22a9d96..a32c77d 100644 --- a/R/heritability.R +++ b/R/heritability.R @@ -11,49 +11,44 @@ #' \dontrun{ #' JULES COMPLETE #' } -herit.asreml <- function(model, term = "SYear:Genotype", ...) { - dat <- eval(model$call$data) - if (length(grep(":", term))) { - terms <- all.vars(as.formula(paste("~ ", term, sep = ""))) - labs <- attr(terms(as.formula(model$call$random)), "term.labels") - iterm <- labs[grep(paste(terms[1], "*.*", terms[2], sep = ""), labs)] - uv <- sapply(strsplit(iterm, "\\("), "[", 1) - if (uv == "fa") { - pred <- predict(model, classify = term, only = iterm, sed = TRUE, ...) - sumfa <- fa.asreml(model, trunc.char = NULL) - gam <- diag(sumfa$gammas[[grep(paste(terms[1], "*.*", terms[2], sep = ""), names(sumfa$gammas))]]$Gmat) - } else if (uv %in% c("diag", "corh", "corgh", "us")) { - pred <- predict(model, classify = term, only = term, sed = TRUE, ...) - if (uv %in% c("diag")) { - gam <- summary(model, vparameters = TRUE)$vparameters[[term]] - } else { - gam <- diag(summary(model, vparameters = TRUE)$vparameters[[term]]) - } - } - else { - stop("The function does not understand this asreml function.") - } - site <- pred$pvals[[terms[1]]] - levs <- levels(site) - avsed <- c() - for (i in 1:length(levs)) { - inds <- (1:length(site))[as.character(site) %in% levs[i]] - sedm <- pred$sed[inds, inds] - sedm <- sedm[upper.tri(sedm)] - avsed[i] <- mean(sedm) - } - } else { - pred <- predict(model, classify = term, only = term, sed = TRUE, ...) - con <- model$vparameters.con[grep("units\\!R", names(model$vparameters.con))] - if (length(con) == 0 || (con != 4)) { - gam <- model$vparameters[grep(term, names(model$vparameters))] * model$sigma2 +herit.asreml <- function(model, term = "SYear:Genotype", ...){ + dat <- eval(model$call$data) + if(length(grep(":", term))){ + terms <- all.vars(as.formula(paste("~ ", term, sep = ""))) + labs <- attr(terms(as.formula(model$call$random)), "term.labels") + iterm <- labs[grep(paste(terms[1], "*.*", terms[2], sep = ""), labs)] + uv <- sapply(strsplit(iterm, "\\("), "[", 1) + if(uv == "fa"){ + pred <- predict(model, classify = term, only = iterm, sed = TRUE, ...) + sumfa <- ASExtras4::fa.asreml(model, trunc.char = NULL) + gam <- diag(sumfa$gammas[[grep(paste(terms[1], "*.*", terms[2], sep = ""), names(sumfa$gammas))]]$Gmat) + } else if(uv %in% c("diag","corh","corgh","us")){ + pred <- predict(model, classify = term, only = term, sed = TRUE, ...) + if(uv %in% c("diag")) + gam <- summary(model, vparameters = TRUE)$vparameters[[term]] + else + gam <- diag(summary(model, vparameters = TRUE)$vparameters[[term]]) + } + else stop("The function does not understand this asreml function.") + site <- pred$pvals[[terms[1]]] + levs <- levels(site) + avsed <- c() + for(i in 1:length(levs)){ + inds <- (1:length(site))[as.character(site) %in% levs[i]] + sedm <- pred$sed[inds, inds] + sedm <- sedm[upper.tri(sedm)] + avsed[i] <- mean(sedm) + } } else { - gam <- model$vparameters[grep(term, names(model$vparameters))] + pred <- predict(model, classify = term, only = term, sed = TRUE, ...) + con <- model$vparameters.con[grep("units\\!R", names(model$vparameters.con))] + if(length(con) == 0 || (con != 4)) + gam <- model$vparameters[grep(term, names(model$vparameters))]*model$sigma2 + else gam <- model$vparameters[grep(term, names(model$vparameters))] + avsed <- pred$avsed[2] + levs <- term } - avsed <- pred$avsed[2] - levs <- term - } - h2 <- 1 - (avsed^2) / (2 * gam) - names(h2) <- levs - h2 + h2 <- 1 - (avsed^2)/(2*gam) + names(h2) <- levs + h2 } diff --git a/R/manhattan.R b/R/manhattan.R index c7c5b4b..d793909 100644 --- a/R/manhattan.R +++ b/R/manhattan.R @@ -1,73 +1,97 @@ -#' Manhattan plot using ggplot +#' Manhattan plot for QTL diagnostics #' -#' @param mlist -#' @param cross -#' @param chr.in -#' @param annotate -#' @param ... +#' Creates a Manhattan-style plot (via `ggplot2`) of the outlier/diagnostic +#' statistic across genome position for one or more analyses. +#' +#' The function expects `mlist` to be a named list of results where each element +#' contains QTL diagnostic output in `x$QTL$diag$oint[[1]]`. Marker positions are +#' taken from the supplied `cross` object (typically an `rqtl` cross). +#' +#' @param mlist Named list of QTL analysis results. Each element must contain: +#' \itemize{ +#' \item `QTL$diag$oint[[1]]`: a named numeric vector of statistics per marker +#' (names like `"Chr.1.abc"` are allowed), +#' \item `QTL$qtl`: a character vector of selected QTL marker names (used for +#' annotation when `annotate = TRUE`). +#' } +#' @param cross A `qtl::cross` object containing marker maps in `cross$geno`. +#' Used to compute cumulative genome positions and chromosome labels. +#' @param chr.in Optional character vector of chromosome IDs to include +#' (e.g. `c("1","2","X")`). Default `NULL` (use all chromosomes). +#' @param annotate Logical. If `TRUE` (default), label selected QTL markers +#' (`x$QTL$qtl`) on the plot. +#' @param ... Currently unused. +#' +#' @return A `ggplot2` object (a faceted Manhattan plot). If `annotate = TRUE`, +#' the returned plot includes text labels for selected markers. +#' +#' @details +#' The statistic is plotted against a cumulative genome position computed from +#' the marker maps in `cross$geno`. Chromosomes are shown using alternating point +#' colours (two groups). The plot is faceted by the names of `mlist` (one panel +#' per analysis). +#' +#' If `chr.in` is supplied, both the cross object and the plotted markers are +#' filtered to those chromosomes. #' -#' @return #' @export #' #' @examples #' \dontrun{ #' JULES COMPLETE #' } -manhattan <- function(mlist, cross, chr.in = NULL, annotate = TRUE, ...) { - nams <- names(mlist) - outs <- lapply(mlist, function(el) { - temp <- el$QTL$diag$oint[[1]] - names(temp) <- gsub("Chr\\.", "", names(temp)) - temp - }) - if (!is.null(chr.in)) { - cross <- subset(cross, chr = chr.in) - for (i in 1:length(outs)) { - onam <- sapply(strsplit(names(outs[[i]]), "\\."), "[", 1) - outs[[i]] <- outs[[i]][onam %in% chr.in] +manhattan <- function(mlist, cross, chr.in = NULL, annotate = TRUE, ...){ + nams <- names(mlist) + outs <- lapply(mlist, function(el){ + temp <- el$QTL$diag$oint[[1]] + names(temp) <- gsub("Chr\\.","", names(temp)) + temp + }) + if(!is.null(chr.in)){ + cross <- subset(cross, chr = chr.in) + for(i in 1:length(outs)){ + onam <- sapply(strsplit(names(outs[[i]]), "\\."), "[", 1) + outs[[i]] <- outs[[i]][onam %in% chr.in] + } } - } - dat <- cbind.data.frame(value = unlist(outs)) - dat$nout <- sapply(outs, names) - len <- sapply(outs, length)[1] - dat$Name <- rep(nams, each = len) - chr <- rep(names(cross$geno), times = nmar(cross)) - dat$chr <- factor(rep(chr, length(nams))) - dist <- lapply(cross$geno, function(el) { - tel <- c(100000, diff(el$map)) - names(tel)[1] <- names(el$map)[1] - tel - }) - dist <- cumsum(unlist(dist)) - sp <- unlist(lapply(split(dist, chr), function(el) min(el) + diff(range(el)) / 2)) - spc <- unlist(lapply(split(dist, chr), function(el) max(el) + 500000)) - dat$dist <- rep(dist, length(nams)) - dat$chr.g <- dat$chr - levels(dat$chr.g) <- c(rep(c("g1", "g2"), 10), "g1") - cols <- brewer.pal(3, "Set1")[1:2] - gp <- ggplot(dat, aes(x = dist, y = value)) + - facet_wrap(~Name, ncol = 1, scales = "free_y") + - geom_vline(xintercept = spc, colour = "gray80") + - geom_point(aes(colour = chr.g)) + - scale_y_continuous(expand = c(0.02, 0), breaks = seq(0, 100, by = 10)) + - scale_x_continuous(breaks = sp, labels = names(cross$geno), expand = c(0.02, 0)) + - xlab("") + - ylab("Outlier Statistic") + - scale_color_manual(values = cols) + - theme(legend.position = "none", axis.text = element_text(size = 10), panel.background = element_blank(), panel.border = element_rect(colour = "gray80", fill = NA, size = 1.1), panel.grid.major.y = element_line(colour = "gray90", size = 1.2), panel.grid.minor.y = element_line(colour = "gray90", size = 0.8), panel.grid.major.x = element_blank(), axis.title = element_text(size = 20), strip.text = element_text(size = 10)) - if (annotate) { - qtl <- lapply(mlist, function(el) gsub("Chr\\.", "", el$QTL$qtl)) - qtl.dat <- cbind.data.frame(Name = rep(nams, times = sapply(qtl, length))) - qtl.dat$nout <- unlist(qtl) - if (!is.null(chr.in)) { - qnam <- sapply(strsplit(as.character(qtl.dat$nout), "\\."), "[", 1) - print(qnam) - qtl.dat <- qtl.dat[qnam %in% chr.in, ] + dat <- cbind.data.frame(value = unlist(outs)) + dat$nout <- sapply(outs, names) + len <- sapply(outs, length)[1] + dat$Name <- rep(nams, each = len) + chr <- rep(names(cross$geno), times = nmar(cross)) + dat$chr <- factor(rep(chr, length(nams))) + dist <- lapply(cross$geno, function(el) { + tel <- c(100000, diff(el$map)) + names(tel)[1] <- names(el$map)[1] + tel + }) + dist <- cumsum(unlist(dist)) + sp <- unlist(lapply(split(dist, chr), function(el) min(el) + diff(range(el))/2)) + spc <- unlist(lapply(split(dist, chr), function(el) max(el) + 500000)) + dat$dist <- rep(dist, length(nams)) + dat$chr.g <- dat$chr + levels(dat$chr.g) <- c(rep(c("g1","g2"), 10), "g1") + cols <- brewer.pal(3, "Set1")[1:2] + gp <- ggplot(dat, aes(x = dist, y = value)) + facet_wrap(~ Name, ncol = 1, scales = "free_y") + + geom_vline(xintercept = spc, colour = "gray80") + + geom_point(aes(colour = chr.g)) + + scale_y_continuous(expand = c(0.02,0), breaks = seq(0,100, by = 10)) + + scale_x_continuous(breaks = sp, labels = names(cross$geno), expand = c(0.02,0)) + + xlab("") + ylab("Outlier Statistic") + scale_color_manual(values = cols) + + theme(legend.position = "none",axis.text = element_text(size = 10), panel.background = element_blank(), panel.border = element_rect(colour = "gray80", fill = NA, size = 1.1), panel.grid.major.y = element_line(colour = "gray90", size = 1.2), panel.grid.minor.y = element_line(colour = "gray90", size = 0.8), panel.grid.major.x = element_blank(), axis.title = element_text(size = 20), strip.text = element_text(size=10)) + if(annotate){ + qtl <- lapply(mlist, function(el) gsub("Chr\\.", "", el$QTL$qtl)) + qtl.dat <- cbind.data.frame(Name = rep(nams, times = sapply(qtl, length))) + qtl.dat$nout <- unlist(qtl) + if(!is.null(chr.in)){ + qnam <- sapply(strsplit(as.character(qtl.dat$nout), "\\."), "[", 1) + print(qnam) + qtl.dat <- qtl.dat[qnam %in% chr.in,] + } + print(qtl.dat) + subexpr <- paste(dat$Name, dat$nout, sep = ":") %in% paste(qtl.dat$Name, qtl.dat$nout, sep = ":") + ann <- subset(dat, subexpr) + print(ann) + gp + geom_text(data = ann, aes(label = nout), size = 5) } - print(qtl.dat) - subexpr <- paste(dat$Name, dat$nout, sep = ":") %in% paste(qtl.dat$Name, qtl.dat$nout, sep = ":") - ann <- subset(dat, subexpr) - print(ann) - gp + geom_text(data = ann, aes(label = nout), size = 5) - } } diff --git a/R/outliers.R b/R/outliers.R index 1adf634..6fb0729 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -1,72 +1,109 @@ -#' Downweight outliers +#' Handle outliers using standardized residuals +#' +#' Utilities to flag outlying observations per trait using standardized residuals +#' from fitted models (typically `asreml`), with a common cutoff rule. #' -#' @param data -#' @param model -#' @param cutoff +#' `outlier.down()` adds indicator (0/1) columns marking outlying rows, which can +#' be used as covariates or to downweight those observations in a subsequent fit. +#' `outlier.rem()` replaces outlying responses with `NA` (per trait) and returns +#' which traits had any outliers removed. +#' +#' @param data A data frame containing the response columns referenced by `model`. +#' @param model A named list of fitted model objects, one per trait/response. +#' Each element must contain standardized residuals at `x$aom$R[, 2]`. +#' Names of `model` are used to match response columns in `data`. +#' @param cutoff Numeric. Absolute standardized residual threshold used to flag +#' outliers (default `3`). #' #' @return +#' - `outlier.down()` returns the input `data` with additional 0/1 indicator +#' columns appended. New columns are named `".o."`. +#' - `outlier.rem()` returns a list with: +#' \describe{ +#' \item{data}{`data` with outlying responses set to `NA` (per trait).} +#' \item{out}{Named logical vector indicating whether each trait had any +#' outliers removed.} +#' } +#' +#' @details +#' Outliers are identified for each trait using +#' `abs(model[[trait]]$aom$R[, 2]) > cutoff`. +#' +#' `outlier.down()` will append new indicator columns; if columns with the same +#' prefix already exist (e.g. `"Trait.o.*"`), numbering continues from the +#' highest existing suffix. +#' +#' Both functions print the row indices of detected outliers. +#' +#' @name outliers +NULL + +#' Downweight outliers +#' +#' Adds one indicator column per detected outlying observation (per trait). +#' +#' @rdname outliers #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' #' } -outlier.down <- function(data, model, cutoff = 3) { - ss <- names(model) - inds <- 1:nrow(data) - for (i in 1:length(ss)) { - str <- abs(model[[ss[i]]]$aom$R[, 2]) - r <- str > cutoff - wh <- inds[r] - wh <- wh[!is.na(wh)] - if (length(wh)) { - ps <- paste(ss[i], "o", sep = ".") - num <- 0 - if (length(wt <- grep(ps, names(data)))) { - num <- sapply(strsplit(names(data)[wt], "\\."), function(el) el[length(el)]) - num <- as.numeric(num[length(num)]) - } - print(wh) - for (j in 1:length(wh)) { - nam <- paste(ps, j + num, sep = ".") - v <- rep(0, nrow(data)) - v[wh[j]] <- 1 - data[[nam]] <- v - } +outlier.down <- function(data, model, cutoff = 3){ + ss <- names(model) + inds <- 1:nrow(data) + for(i in 1:length(ss)){ + str <- abs(model[[ss[i]]]$aom$R[,2]) + r <- str > cutoff + wh <- inds[r] + wh <- wh[!is.na(wh)] + if(length(wh)){ + ps <- paste(ss[i], "o", sep = ".") + num <- 0 + if(length(wt <- grep(ps, names(data)))){ + num <- sapply(strsplit(names(data)[wt], "\\."), function(el) el[length(el)]) + num <- as.numeric(num[length(num)]) + } + print(wh) + for(j in 1:length(wh)){ + nam <- paste(ps, j + num, sep = ".") + v <- rep(0, nrow(data)) + v[wh[j]] <- 1 + data[[nam]] <- v + } + } } - } - data + data } -#' Outlier removal function +#' Remove outliers #' -#' @param data -#' @param model -#' @param cutoff +#' Sets outlying response values to `NA` (per trait) and reports which traits had +#' outliers removed. #' -#' @return +#' @rdname outliers #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' #' } -outlier.rem <- function(data, model, cutoff = 3) { - ss <- names(model) - inds <- 1:nrow(data) - out <- rep(FALSE, length(model)) - names(out) <- ss - for (i in 1:length(ss)) { - trait <- data[[ss[i]]] - str <- abs(model[[ss[i]]]$aom$R[, 2]) - r <- str > cutoff - wh <- inds[r] - wh <- wh[!is.na(wh)] - if (length(wh)) { - print(wh) - data[[ss[i]]][wh] <- NA - out[i] <- TRUE +outlier.rem <- function(data, model, cutoff = 3){ + ss <- names(model) + inds <- 1:nrow(data) + out <- rep(FALSE, length(model)) + names(out) <- ss + for(i in 1:length(ss)){ + trait <- data[[ss[i]]] + str <- abs(model[[ss[i]]]$aom$R[,2]) + r <- str > cutoff + wh <- inds[r] + wh <- wh[!is.na(wh)] + if(length(wh)){ + print(wh) + data[[ss[i]]][wh] <- NA + out[i] <- TRUE + } } - } - list(data = data, out = out) + list(data = data, out = out) } diff --git a/R/pad.data.R b/R/pad.data.R index 1b497cf..50604a2 100644 --- a/R/pad.data.R +++ b/R/pad.data.R @@ -1,51 +1,84 @@ -#' Title +#' Pad missing row–column positions within groups #' -#' @param data -#' @param pattern -#' @param split -#' @param keep -#' @param fill +#' Ensures that, within each level of a grouping variable (e.g. a block), +#' every combination of a row factor and a column factor exists. +#' Missing row–column positions are added as new rows with `NA`s. +#' +#' @param data A data frame. +#' @param pattern Character string of the form `"Row:Column"` giving the names of +#' the two variables that define the row–column layout. +#' @param split Character. Name of the column used to split `data` into groups +#' (default `"Block"`). Padding is done within each group. +#' @param keep Integer. Column index to copy from existing data into newly added +#' rows (default `4`). The value copied comes from the first rows of the group +#' (recycled as needed). This is typically used to keep a constant identifier +#' (e.g. block label or trial id). +#' @param fill Optional. Integer or character specifying columns to set to `NA` +#' in newly created rows. Default `NULL` (do nothing). +#' +#' @return A data frame like `data`, with additional rows added for missing +#' row–column combinations. A column `add` is added, with values `"old"` for +#' original rows and `"new"` for padded rows. The result is ordered by the +#' row and column variables in `pattern`. +#' +#' @details +#' For each group defined by `split`, the function forms a contingency table +#' of `Row` by `Column`. For any zero-count cell, it adds a new row where: +#' \itemize{ +#' \item `Row` and `Column` are set to the missing combination, +#' \item all other columns are `NA` (except `keep`, which is copied), +#' \item `add` is set to `"new"`. +#' } +#' The row/column factors are then re-leveled to be in increasing numeric order +#' (assuming their levels are numeric strings). #' -#' @return #' @export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' d <- expand.grid( +#' Block = factor(1:2), +#' Row = factor(1:3), +#' Column = factor(1:3) +#' ) +#' +#' # Remove one cell to simulate a missing plot +#' d <- d[-5, ] +#' d # Inspect +#' +#' pad.data(d, pattern = "Row:Column", split = "Block", keep = 1) +#' #' } -pad.data <- function(data, pattern = "Row:Column", split = "Block", keep = 4, fill = NULL) { - pat <- unlist(strsplit(pattern, ":")) - if (!(split %in% names(data))) { - stop("split argument not in data") - } - if (!all(pat %in% names(data))) { - stop("One or more of the variables in pattern argument not in data") - } - spd <- split(data, data[[split]]) - spd <- lapply(spd, function(el, pat) { - temp <- el - temp <- cbind.data.frame(lapply(temp, function(el) { - if (is.factor(el)) factor(el) else el - })) - temp$add <- "old" - tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) - wh <- which(tabs == 0, arr.ind = TRUE) - if (dim(wh)[1] > 0) { - tp <- temp[1:nrow(wh), ] - tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) - tp[, keep] <- temp[1:nrow(wh), keep] - tp[[pat[1]]] <- factor(rownames(tabs)[wh[, 1]]) - tp[[pat[2]]] <- factor(colnames(tabs)[wh[, 2]]) - if (!is.null(fill)) { - tp[, fill] <- NA - } - tp$add <- "new" - temp <- rbind.data.frame(temp, tp) - } - temp - }, pat) - ad <- do.call("rbind.data.frame", spd) - ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) - ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) - ad[order(ad[[pat[1]]], ad[[pat[2]]]), ] +#' + +pad.data <- function(data, pattern = "Row:Column", split = "Block", keep = 4, fill = NULL){ + pat <- unlist(strsplit(pattern, ":")) + if(!(split %in% names(data))) + stop("split argument not in data") + if(!all(pat %in% names(data))) + stop("One or more of the variables in pattern argument not in data") + spd <- split(data, data[[split]]) + spd <- lapply(spd, function(el, pat){ + temp <- el + temp <- cbind.data.frame(lapply(temp, function(el){ if(is.factor(el)) factor(el) else el})) + temp$add <- "old" + tabs <- table(temp[[pat[1]]], temp[[pat[2]]]) + wh <- which(tabs == 0, arr.ind = TRUE) + if(dim(wh)[1] > 0){ + tp <- temp[1:nrow(wh),] + tp <- cbind.data.frame(lapply(tp, function(el) rep(NA, length(el)))) + tp[,keep] <- temp[1:nrow(wh),keep] + tp[[pat[1]]] <- factor(rownames(tabs)[wh[,1]]) + tp[[pat[2]]] <- factor(colnames(tabs)[wh[,2]]) + if(!is.null(fill)) + tp[,fill] <- NA + tp$add <- "new" + temp <- rbind.data.frame(temp, tp) + } + temp + }, pat) + ad <- do.call("rbind.data.frame", spd) + ad[[pat[1]]] <- factor(ad[[pat[1]]], levels = as.character(sort(as.numeric(levels(ad[[pat[1]]]))))) + ad[[pat[2]]] <- factor(ad[[pat[2]]], levels = as.character(sort(as.numeric(levels(ad[[pat[2]]]))))) + ad[order(ad[[pat[1]]],ad[[pat[2]]]),] } diff --git a/R/regresion_functions.R b/R/regresion_functions.R index eb6586e..86bd4e2 100644 --- a/R/regresion_functions.R +++ b/R/regresion_functions.R @@ -15,81 +15,261 @@ #' \dontrun{ #' JULES COMPLETE #' } -randomRegress <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", ...) { - if (is.null(levs)) { - stop("Treatment levels cannnot be NULL.") - } - evnam <- unlist(strsplit(Env, ":")) - enam <- evnam[1] - vnam <- evnam[2] - rterm <- attr(terms.formula(model$call$random), "term.labels") - rterm <- rterm[grep(paste(evnam, collapse = "|"), rterm)] - print(rterm) - if (substring(rterm, 1, 2) == "fa") { - sumfa <- fa.asreml(model, trunc.char = NULL) - pvals <- sumfa$blups[[rterm]]$blups[, 1:3] - Gmat <- sumfa$gammas[[rterm]]$Gmat - } - else { - pred <- predict(model, classify = Env, only = Env, ...) - Gmat <- summary(model, vparameters = TRUE)$vparameters[[Env]] - pvals <- pred$pvals - names(pvals)[3] <- "blup" - } - tsnams <- dimnames(Gmat)[[2]] - if (length(grep(sep, tsnams))) { - st <- strsplit(tsnams, split = sep) - tnam <- sapply(st, function(el) el[1]) - snam <- sapply(st, function(el) el[2]) - if (!all(levs %in% c(snam, tnam))) { - stop("Treatment levels do not exist in ", enam) +randomRegress <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", pev = TRUE, ...){ + if(is.null(levs)) + stop("Treatment levels cannnot be NULL.") + evnam <- unlist(strsplit(Env, ":")) + enam <- evnam[1]; vnam <- evnam[2] + penv <- gsub(":",".*", Env) + rterm <- attr(terms.formula(model$call$random), "term.labels") + rterm <- rterm[grep(penv, rterm)] + print(rterm) + if(substring(rterm, 1, 2) == "fa"){ + sumfa <- ASExtras4::fa.asreml(model, trunc.char = NULL) + pvals <- sumfa$blups[[rterm]]$blups[,1:3] + Gmat <- sumfa$gammas[[rterm]]$Gmat } - if (all(levs %in% snam)) { - tnam <- snam - snam <- sapply(st, function(el) el[1]) + else { + pred <- predict(model, classify = Env, only = Env, vcov = TRUE, ...) + Gmat <- summary(model, vparameters = TRUE)$vparameters[[Env]] + pvals <- pred$pvals + names(pvals)[3] <- "blup" } - } else { - tnam <- tsnams - snam <- rep("Single", length(tnam)) - } - usnams <- unique(snam) - tmat <- diag(nrow(Gmat)) - beta <- sigr <- c() - blist <- list() - for (i in 1:length(usnams)) { - inds <- (1:length(snam))[snam %in% usnams[i]] - names(inds) <- tnam[inds] - whl <- (1:2)[levs %in% names(inds)] - if (length(whl) == 2) { - tind <- inds[levs] - mat <- Gmat[tind, tind] - beta[i] <- mat[1, 2] / mat[1, 1] - rho <- mat[1, 2] / sqrt(mat[1, 1] * mat[2, 2]) - sigr[i] <- (mat[2, 2] * (1 - rho^2)) - tmat[tind[2], tind[1]] <- -beta[i] - blow <- pvals$blup[pvals[[enam]] %in% tsnams[tind[1]]] - bhigh <- pvals$blup[pvals[[enam]] %in% tsnams[tind[2]]] - bresp <- bhigh - beta[i] * blow - blist[[i]] <- cbind.data.frame(blow, bhigh, bresp) + tsnams <- dimnames(Gmat)[[2]] + if(length(grep(sep, tsnams))){ + st <- strsplit(tsnams, split = sep) + tnam <- sapply(st, function(el) el[1]) + snam <- sapply(st, function(el) el[2]) + if(!all(levs %in% c(snam, tnam))) + stop("Treatment levels do not exist in ", enam) + if(all(levs %in% snam)){ + tnam <- snam + snam <- sapply(st, function(el) el[1]) + } } else { - slevs <- levs[whl] - tind <- inds[slevs] - if (whl == 1) { - blist[[i]] <- cbind.data.frame(blow = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bhigh = NA, bresp = NA) - } else { - blist[[i]] <- cbind.data.frame(blow = NA, bhigh = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bresp = NA) - } + tnam <- tsnams + snam <- rep("Single", length(tnam)) } - } - TGmat <- tmat %*% Gmat %*% t(tmat) - tsnams <- gsub(levs[2], "resp", tsnams) - tsnams <- gsub(levs[1], "eff", tsnams) - dimnames(TGmat) <- list(tsnams, tsnams) - blups <- do.call("rbind.data.frame", blist) - names(blups)[1:3] <- c(levs, "resp") - glev <- unique(as.character(pvals[[vnam]])) - blups <- cbind.data.frame(Site = rep(usnams, each = length(glev)), Variety = rep(glev, length(usnams)), blups) - list(blups = blups, TGmat = TGmat, Gmat = Gmat, beta = beta, sigr = sigr, tmat = tmat) + usnams <- unique(snam) + tmat <- diag(nrow(Gmat)) + beta <- sigr <- c() + blist <- list() + for(i in 1:length(usnams)){ + inds <- (1:length(snam))[snam %in% usnams[i]] + names(inds) <- tnam[inds] + whl <- (1:2)[levs %in% names(inds)] + if(length(whl) == 2){ + tind <- inds[levs] + mat <- Gmat[tind, tind] + beta[i] <- mat[1,2]/mat[1,1] + rho <- mat[1,2]/sqrt(mat[1,1]*mat[2,2]) + sigr[i] <- (mat[2,2]*(1 - rho^2)) + tmat[tind[2], tind[1]] <- - beta[i] + imat <- diag(2) + imat[2,1] <- - beta[i] + blow <- pvals$blup[pvals[[enam]] %in% tsnams[tind[1]]] + bhigh <- pvals$blup[pvals[[enam]] %in% tsnams[tind[2]]] + bresp <- bhigh - beta[i]*blow + blist[[i]] <- cbind.data.frame(blow, bhigh, bresp) + lowi <- (1:nrow(pvals))[pvals[[enam]] %in% tsnams[tind[1]]] + highi <- (1:nrow(pvals))[pvals[[enam]] %in% tsnams[tind[2]]] + pevm <- kronecker(imat, diag(length(blow))) %*% as.matrix(pred$vcov[c(lowi,highi),c(lowi,highi)]) %*% kronecker(t(imat), diag(length(blow))) + if(!pev) + pevm <- kronecker(diag(c(mat[1,1],sigr[i])), diag(length(blow))) - pevm + bvar <- pevm[(length(blow) + 1):ncol(pevm),(length(blow) + 1):ncol(pevm)] + sed <- apply(combn(diag(bvar), 2), 2, sum) - 2*bvar[lower.tri(bvar)] + sed[sed < 0] <- NA + blist[[i]]$HSD <- (mean(sqrt(sed), na.rm = TRUE)/sqrt(2))*qtukey(0.95, length(blow), df = length(blow) - 2) + } else { + slevs <- levs[whl] + tind <- inds[slevs] + if(whl == 1) + blist[[i]] <- cbind.data.frame(blow = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bhigh = NA, bresp = NA) + else blist[[i]] <- cbind.data.frame(blow = NA, bhigh = pvals$blup[pvals[[enam]] %in% tsnams[tind]], bresp = NA) + } + } + TGmat <- tmat %*% Gmat %*% t(tmat) + tsnams <- gsub(levs[2], "resp", tsnams) + tsnams <- gsub(levs[1], "eff", tsnams) + dimnames(TGmat) <- list(tsnams, tsnams) + blups <- do.call("rbind.data.frame", blist) + names(blups)[1:3] <- c(levs, "resp") + glev <- unique(as.character(pvals[[vnam]])) + blups <- cbind.data.frame(Site = rep(usnams, each = length(glev)), Variety = rep(glev, length(usnams)), blups) + list(blups = blups, TGmat = TGmat, Gmat = Gmat, beta = beta, sigr = sigr, tmat = tmat) +} + +## BLUEs regression + +fixedRegress <- function(model, term = "Treatment:Genotype", by = NULL, levs = NULL, simple = TRUE){ + pterm <- term + if(is.null(levs)) + stop("Treatment levels cannnot be NULL.") + term <- unlist(strsplit(term, ":")) + if(length(term) < 2) + stop("Argument \"term\" needs at least two variables.") + pred <- predict(model, classify = pterm, vcov = TRUE) + whna <- !is.na(pred$pvals$predicted.value) + pv <- pred$pvals[whna,] + vc <- as.matrix(pred$vcov)[whna,whna] + wht <- unlist(sapply(pv[,term], function(el, levs) all(levs %in% levels(el)), levs)) + if(!any(wht)) + stop("Some levels specified in \"levs\" do not exist in term variables.") + tnam <- term[wht] + if(!is.null(by)){ + bys <- unlist(strsplit(by, ":")) + if(!all(bys %in% term)) + stop("Some variables in argument \"by\" are not in \"term\".") + if(tnam %in% bys) + stop("Levels specified in \"levs\" cannot be in \"by\" variable.") + rterm <- term[!(term %in% c(tnam, bys))] + if(!length(rterm)) + stop("There are no variables to form regression between specified levels.") + if(length(rterm) > 1) + pv[["regress"]] <- apply(pv[,rterm], 1, function(el) paste(el, collapse = ":")) + else pv[["regress"]] <- pv[[rterm]] + if(length(bys) > 1) + pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) + uby <- as.character(pv[[by]]) + um <- unique(uby) + } else { + uby <- rep(tnam, nrow(pv)) + um <- unique(uby) + rterm <- term[!(term %in% tnam)] + pv[["regress"]] <- pv[[rterm]] + } + resp.list <- list() + for(i in 1:length(um)){ + inds <- uby %in% um[i] + pvt <- pv[inds,] + pv1 <- pvt[wh1 <- pvt[[tnam]] %in% levs[1],] + pv2 <- pvt[wh2 <- pvt[[tnam]] %in% levs[2],] + whr <- intersect(pv1[["regress"]], pv2[["regress"]]) + if(length(whr) > 5){ + wt1 <- pv1[["regress"]] %in% whr + wt2 <- pv2[["regress"]] %in% whr + pcont <- pv1$predicted.value[wt1] + ptreat <- pv2$predicted.value[wt2] + regt <- pv2[["regress"]][wt2] + if(!simple){ + vct <- vc[inds, inds] + s22 <- vct[wh2,wh2] + s22 <- s22[wt2,wt2] + s11 <- vct[wh1,wh1] + s11 <- s11[wt1,wt1] + s21 <- vct[wh2,wh1] + s21 <- s21[wt2,wt1] + resp <- ptreat - s21 %*% solve(s11) %*% pcont + resp.var <- s22 - s21 %*% solve(s11) %*% t(s21) + rdf <- model$nedf + } else { + lmr <- lm(ptreat ~ pcont) + resp <- lmr$residuals + xm <- model.matrix( ~ pcont) + vmat <- (diag(length(ptreat)) - xm %*% solve(t(xm)%*%xm) %*% t(xm)) + resp.var <- ((vmat) %*% t(vmat))*(summary(lmr)$sigma^2) + rdf <- lmr$df.residual + } + std.error <- sqrt(diag(resp.var)) + sed <- sqrt(apply(combn(diag(resp.var), 2), 2, sum) - 2*resp.var[lower.tri(resp.var)]) + respd <- cbind.data.frame(Split = um[i], Regress.Var = regt) + respd[[levs[1]]] <- pcont + respd[[levs[2]]] <- ptreat + respd$reponse.index <- resp + respd$std.error <- std.error + respd$HSD <- (mean(sed)/sqrt(2))*qtukey(0.95, length(ptreat), df = rdf) + respd$sed <- mean(sed) + resp.list[[i]] <- respd + } else warning("Some treatment combinations in ", um[i]," have less than 5 matching observations and have been omitted.\n") + } + resp.list <- resp.list[!sapply(resp.list, is.null)] + do.call("rbind.data.frame", resp.list) +} + +compare <- function(model, term = "Treatment:Genotype", by = NULL, omit.string = NULL, type = "HSD", pev = TRUE, fw.method = "none", ...){ + pred <- predict(model, classify = term, vcov = TRUE, ...) + terms <- unlist(strsplit(term, ":")) + pv <- pred$pvals + inds <- !is.na(pv$predicted.value) + if(!pev & all(terms %in% all.vars(model$call$random))){ + varm <- summary(model, vparameters = TRUE)$vparameters[[term]] + if(length(terms) > 1) + len <- table(pv[,1])[1] + else len <- nrow(pv) + vara <- kronecker(varm, diag(len)) - pred$vcov + vara[inds, inds] + } else vara <- pred$vcov[inds, inds] + pv <- pv[inds,] + section <- FALSE + if(!is.null(by)){ + bys <- unlist(strsplit(by, ":")) + if(all(terms %in% bys)) + stop("Argument \"by\" indicates no multiple comparisons are being made.") + if(!all(bys %in% terms)) + stop("Some terms in argument \"by\" are not in \"term\".") + if(length(bys) > 1) + pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) + } else{ + by <- term + pv[[by]] <- by + } + if(!is.null(omit.string)){ + oind <- grep(omit.string, as.character(pv[[gnam]])) + if(length(oind)){ + pv <- pv[-oind,] + sed <- sed[-oind,-oind] + } + } + sst <- as.character(pv[[by]]) + um <- unique(sst) + if(type %in% c("HSD","LSD")){ + tsd <- avsed <- c() + for(k in 1:length(um)){ + sinds <- sst %in% um[k] + svar <- vara[sinds, sinds] + avsed[k] <- sqrt(mean(apply(combn(diag(svar), 2), 2, sum) - 2*svar[lower.tri(svar)])) + if(type == "HSD") + tsd[k] <- (avsed[k]/sqrt(2))*qtukey(0.95, length(sinds), model$nedf) + else tsd[k] <- avsed[k]*qt(0.025, df = model$nedf, lower.tail = FALSE) + } + pv <- cbind.data.frame(pv[,1:(length(terms) + 2)]) + pv[[type]] <- rep(tsd, times = table(sst)) + pv[["sed"]] <- rep(avsed, times = table(sst)) + } + else if(type %in% "PVAL"){ + pvs <- split(pv, pv[[by]]) + yvar <- deparse(model$call$fixed[[2]]) + xvar <- labels(terms(as.formula(model$call$fixed))) + fix.form <- as.formula(paste(yvar, " ~ ", xvar[length(xvar)], " - 1", sep = "")) + model <- update(model, fixed. = fix.form, Cfixed = TRUE) + coefs <- model$coefficients$fixed + cinds <- grep(paste(terms, collapse = ".*"), rownames(coefs)) + coefs <- coefs[cinds,,drop = FALSE] + for(k in 1:length(um)){ + umt <- paste(strsplit(um[k], ":")[[1]], collapse = ".*") + sind <- cinds[grep(umt, rownames(coefs))] + scf <- coefs[grep(umt, rownames(coefs)),] + sna <- scf == 0 + aind <- sind[!sna] + pvt <- pvs[[k]] + cb <- t(combn(nrow(pvt), 2)) + mat <- matrix(0, nrow = nrow(cb), ncol = nrow(pvt)) + mat[cbind(1:nrow(mat), cb[,1])] <- 1 + mat[cbind(1:nrow(mat), cb[,2])] <- -1 + cc <- list(coef = aind, type = "con", comp = mat) + wt <- waldTest(model, list(cc))$Contrasts + pval <- wt$"P-Value" + add <- matrix(0, nrow = nrow(pvt), ncol = nrow(pvt)) + add[lower.tri(add)] <- stats::p.adjust(pval, method = fw.method) + add <- add + t(add) + # add <- add[ord, ord] + dimnames(add)[[2]] <- apply(pvt[,terms], 1, function(el) paste(el, collapse = ":")) + paste(as.character(pvt[[terms[[1]]]]), as.character(pvt[[terms[2]]]), sep = ":") + pvs[[k]] <- cbind.data.frame(pvs[[k]][,1:(length(terms) + 2)], add) + } + pv <- pvs + } else stop("Please use one of the allowable types, \"HSD\",\"LSD\",\"PVAL\"") + pv } ## BLUEs regression diff --git a/R/reinstall_packages.R b/R/reinstall_packages.R index f092086..1a1c082 100644 --- a/R/reinstall_packages.R +++ b/R/reinstall_packages.R @@ -3,7 +3,6 @@ #' @param location Location to check for installed packages and to reinstall to. Defaults to the first option in `.libPaths()`. #' @param source Logical. Install packages from source that have later source versions than binaries? This should usually be FALSE. #' -#' @return #' @export #' reinstall_packages <- function(location = .libPaths()[1], source = FALSE) { diff --git a/R/themes.R b/R/themes.R index 80e02c6..973442f 100644 --- a/R/themes.R +++ b/R/themes.R @@ -1,113 +1,151 @@ -#' Custom `ggplot2` themes +#' Custom ggplot2 themes #' -#' @description Themes that provide some custom styling for `ggplot2` plots. +#' Minimal theme helpers used across the package. +#' +#' These functions return `ggplot2::theme()` objects that can be added to a plot +#' with `+`. +#' +#' @param base_size Base font size (in points). +#' @param base_family Base font family. +#' +#' @return A `ggplot2` theme object. +#' +#' @details +#' - `theme_design()` removes most grid/axes clutter and hides the legend. +#' - `theme_design_heat()` is intended for heatmaps (keeps legend, rotates x labels). +#' - `theme_scatter()` uses a light grid and a panel border; larger text defaults. +#' - `theme_barplot()` places the legend at the bottom and rotates x labels. #' -#' @param base_size Plot font size, given in pts. -#' @param base_family Font family used for the plot. #' @importFrom ggplot2 theme theme_grey %+replace% element_blank element_text element_line element_rect #' #' @name themes NULL +#' Design theme (no legend) +#' +#' A clean theme for general plots with the legend removed. +#' #' @rdname themes -#' @return #' @export #' #' @examples #' \dontrun{ -#' SAM COMPLETE +#' library(ggplot2) +#' ggplot(mtcars, aes(wt, mpg)) + +#' geom_point() + +#' theme_design() #' } -theme_design <- function(base_size = 11, base_family = "") { - ggplot2::theme_grey(base_size = base_size, base_family = base_family) %+replace% - theme( - legend.position = "none", - panel.grid.minor = element_blank(), - panel.grid.major = element_blank(), - panel.background = element_blank(), - axis.line = element_blank(), - axis.ticks = element_blank(), - axis.text.y = element_text(), - axis.text.x = element_text(angle = 45), - axis.title = element_text(), - strip.text = element_text() - ) +theme_design <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "none", + panel.grid.minor = element_blank(), + panel.grid.major = element_blank(), + panel.background = element_blank(), + axis.line = element_blank(), + axis.ticks = element_blank(), + axis.text.y = element_text(), + axis.text.x = element_text(angle = 0), + axis.title = element_text(), + strip.text = element_text(size = 14) + ) } +#' Design theme for heatmaps +#' +#' A design theme variant intended for heatmaps (legend kept; x labels rotated). +#' #' @rdname themes -#' @return #' @export #' #' @examples #' \dontrun{ -#' SAM COMPLETE +#' library(ggplot2) +#' df <- expand.grid(x = letters[1:5], y = letters[1:5]) +#' df$z <- seq_len(nrow(df)) +#' ggplot(df, aes(x, y, fill = z)) + +#' geom_tile() + +#' theme_design_heat() #' } -theme_design_heat <- function(base_size = 11, base_family = "") { - theme_grey(base_size = base_size, base_family = base_family) %+replace% - theme( - # legend.position = "none", - strip.text.x = element_text(size = 14), # margin = margin(0.15,0,0.15,0, "cm")), - panel.grid.minor = element_blank(), - panel.grid.major = element_blank(), - panel.background = element_blank(), - axis.line = element_blank(), - axis.ticks = element_blank(), - axis.text = element_text(), - axis.title = element_text() - ) +theme_design_heat <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + # legend.position = "none", + strip.text.x = element_text(size = 14), #margin = margin(0.15,0,0.15,0, "cm")), + panel.grid.minor = element_blank(), + panel.grid.major = element_blank(), + panel.background = element_blank(), + axis.line = element_blank(), + axis.ticks = element_blank(), + axis.text.x = element_text(angle = 45), + axis.title = element_text() + ) } +#' Scatter plot theme +#' +#' Theme tuned for scatter plots (light grid and a panel border). +#' #' @rdname themes -#' @return #' @export #' #' @examples #' \dontrun{ -#' SAM COMPLETE +#' library(ggplot2) +#' ggplot(mtcars, aes(wt, mpg)) + +#' geom_point() + +#' theme_scatter() #' } -theme_scatter <- function(base_size = 11, base_family = "") { - theme_grey(base_size = base_size, base_family = base_family) %+replace% - theme( - legend.position = "none", - legend.text = element_text(size = 26), - legend.title = element_text(size = 26), - panel.grid.minor = element_line(colour = "grey80"), - panel.grid.major = element_line(colour = "grey80"), - panel.background = element_blank(), - panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), - axis.text.x = element_text(size = 20), - axis.text.y = element_text(size = 20), - axis.title.y = element_text(size = 20, angle = 90), - axis.title.x = element_text(size = 20), - # axis.title = element_blank(), - strip.text = element_text(size = 16) - ) +theme_scatter <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "none", + legend.text = element_text(size = 26), + legend.title = element_text(size = 26), + panel.grid.minor = element_line(colour = "grey80"), + panel.grid.major = element_line(colour = "grey80"), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), + axis.text.x = element_text(size = 14), + axis.text.y = element_text(size = 14), + axis.title.y = element_text(size = 20, angle = 90), + axis.title.x = element_text(size = 20), + # axis.title = element_blank(), + strip.text = element_text(size = 16) + ) } +#' Bar plot theme +#' +#' Theme tuned for bar plots (legend at bottom; x labels rotated). +#' #' @rdname themes -#' @return #' @export #' #' @examples #' \dontrun{ -#' SAM COMPLETE +#' library(ggplot2) +#' ggplot(mtcars, aes(factor(cyl), fill = factor(gear))) + +#' geom_bar() + +#' theme_barplot() #' } -theme_barplot <- function(base_size = 11, base_family = "") { - theme_grey(base_size = base_size, base_family = base_family) %+replace% - theme( - legend.title = element_blank(), - legend.text = element_text(size = 16), - # legend.position = "none", - panel.grid.minor = element_line(colour = "grey80"), - panel.grid.major = element_line(colour = "grey80"), - panel.background = element_blank(), - panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), - axis.text.x = element_text(angle = 25, hjust = 1, vjust = 1, size = 12), - axis.text.y = element_text(size = 12), - axis.title = element_blank(), - strip.text = element_text(size = 15, margin = margin(0.17, 0, 0.17, 0, "cm")) - ) +theme_barplot <- function (base_size = 11, base_family = "") { + theme_grey(base_size = base_size, base_family = base_family) %+replace% + theme( + legend.position = "bottom", + legend.title = element_blank(), + legend.text = element_text(size = 16), + panel.grid.minor = element_line(colour = "grey80"), + panel.grid.major = element_line(colour = "grey80"), + panel.background = element_blank(), + panel.border = element_rect(fill = NA, size = 1.1, colour = "grey80"), + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 6), + axis.text.y = element_text(size = 12, hjust = 1), + axis.title = element_blank(), + strip.text = element_text(size = 14) + ) } diff --git a/R/wald.R b/R/wald.R index 32e5c33..4ffecf0 100644 --- a/R/wald.R +++ b/R/wald.R @@ -59,190 +59,165 @@ #' #' # TODO example2 (joint zero test): #' } -wald.test.asreml <- function(object, cc, keep.fac = TRUE) { - if (oldClass(object) != "asreml") { - stop("Requires an object of class asreml\n") - } - if (is.null(object$Cfixed)) { - warning("Requires C matrix from model object. Refitting test model with argument \"Cfixed = T\"\n") - asreml.options(Cfixed = TRUE) - object <- update(object) - } - vrb <- as.matrix(object$Cfixed) - tau <- c(object$coefficients$fixed) - names(tau) <- rownames(object$coefficients$fixed) - nc <- length(tau) - sigma2 <- object$sigma2 - vrb <- vrb / sigma2 - ccnams <- names(tau) - zdf <- cdf <- NULL - cc <- lapply(cc, function(el, ccnams) { - if (!all(names(el) %in% c("coef", "type", "comp", "group"))) { - stop("Inappropriately named argument for comparison object.") +wald.test.asreml <- function(object, cc, keep.fac = TRUE) +{ + if(oldClass(object) != "asreml") + stop("Requires an object of class asreml\n") + if(is.null(object$Cfixed)) { + warning("Requires C matrix from model object. Refitting test model with argument \"Cfixed = T\"\n") + asreml.options(Cfixed = TRUE) + object <- update(object) } - if (is.numeric(el$coef)) { - if (max(el$coef) > length(ccnams)) { - stop("coefficient subscript out of bounds") - } - names(el$coef) <- ccnams[el$coef] - } - else { - if (any(is.na(pmatch(el$coef, ccnams)))) { - stop("Names in contrast do not match the names of coefficients of object") - } - temp <- pmatch(el$coef, ccnams) - names(temp) <- el$coef - el$coef <- temp - } - el - }, ccnams) - ## split contrasts and other available tests - ctype <- unlist(lapply(cc, function(el) el$type)) - if (!all(ctype %in% c("con", "zero"))) { - stop("Contrast types must be either \"con\" for treatment comparisons or \"zero\" for testing zero equality") - } - cons <- cc[ctype %in% "con"] - zero <- cc[ctype %in% "zero"] - cse <- ctau <- zwtest <- cwtest <- zpval <- c() - if (length(cons)) { - CRows <- lapply(cons, function(el, nc) { - if (length(el) < 3) { - con <- contr.helmert(length(el$coef))[, (length(el$coef) - 1)] - names(con) <- cnam <- names(el$coef) - cat("Warning: default contrast being taken for", cnam, "is", con, "\n") - row <- rep(0, nc) - row[el$coef] <- con - row - } - else { - if (is.matrix(el$comp)) { - if (length(el$coef) != ncol(el$comp)) { - stop("Length of contrast does not match the number of specified coefficients") - } - cons <- split(el$comp, 1:nrow(el$comp)) - rows <- lapply(cons, function(ell, first = el$coef, nc) { - row <- rep(0, nc) - row[first] <- ell - row - }, first = el$coef, nc) - rows <- unlist(rows, use.names = F) - matrix(rows, nrow = nrow(el$comp), byrow = T) + vrb <- as.matrix(object$Cfixed) + tau <- c(object$coefficients$fixed) + names(tau) <- rownames(object$coefficients$fixed) + nc <- length(tau) + sigma2 <- object$sigma2 + vrb <- vrb/sigma2 + ccnams <- names(tau) + zdf <- cdf <- NULL + cc <- lapply(cc, function(el, ccnams){ + if(!all(names(el) %in% c("coef","type","comp","group"))) + stop("Inappropriately named argument for comparison object.") + if(is.numeric(el$coef)) { + if(max(el$coef) > length(ccnams)) + stop("coefficient subscript out of bounds") + names(el$coef) <- ccnams[el$coef] } else { - if (length(el$coef) != length(el$comp)) { - stop("Length of contrast does not match the number of specified coefficients") - } - row <- rep(0, nc) - row[el$coef] <- el$comp - row + if(any(is.na(pmatch(el$coef, ccnams)))) + stop("Names in contrast do not match the names of coefficients of object") + temp <- pmatch(el$coef, ccnams) + names(temp) <- el$coef + el$coef <- temp } - } - }, nc) - Cmat <- do.call("rbind", CRows) - if (!keep.fac) { - ccnams <- substring(ccnams, regexpr("\\_", ccnams) + 1, nchar(ccnams)) - } - cnam <- lapply(split(Cmat, 1:nrow(Cmat)), function(el, ccnams) { - namr <- ccnams[ifelse(el < 0, T, F)] - naml <- ccnams[ifelse(el > 0, T, F)] - c(paste(naml, collapse = ":"), paste(namr, collapse = ":")) + el }, ccnams) - Cnam <- do.call("rbind", cnam) - gnams <- lapply(cons, function(el) { - if (!is.null(el$group)) { - if (!any(names(el$group) %in% c("left", "right"))) { - stop("group names must be \"left\" and \"right\".") - } - if (is.null(el$group$left)) { - if (is.matrix(el$comp)) { - el$group$left <- rep(NA, nrow(el$comp)) - } else { - el$group$left <- NA - } - } else { - if (is.matrix(el$comp)) { - if (length(el$group$left) == 1) { - el$group$left <- rep(el$group$left, nrow(el$comp)) + ## split contrasts and other available tests + ctype <- unlist(lapply(cc, function(el) el$type)) + if(!all(ctype %in% c("con","zero"))) + stop("Contrast types must be either \"con\" for treatment comparisons or \"zero\" for testing zero equality") + cons <- cc[ctype %in% "con"] + zero <- cc[ctype %in% "zero"] + cse <- ctau <- zwtest <- cwtest <- zpval <- c() + if(length(cons)) { + CRows <- lapply(cons, function(el, nc){ + if(length(el) < 3){ + con <- contr.helmert(length(el$coef))[, (length(el$coef) - 1)] + names(con) <- cnam <- names(el$coef) + cat("Warning: default contrast being taken for", cnam, "is", con, "\n") + row <- rep(0, nc) + row[el$coef] <- con + row } - if (length(el$group$left) != nrow(el$comp)) { - stop("No. of group names do not match the number of comparisons in object") + else { + if(is.matrix(el$comp)) { + if(length(el$coef) != ncol(el$comp)) + stop("Length of contrast does not match the number of specified coefficients") + cons <- split(el$comp, 1:nrow(el$comp)) + rows <- lapply(cons, function(ell, first = el$coef, nc){ + row <- rep(0, nc) + row[first] <- ell + row + }, first = el$coef, nc) + rows <- unlist(rows, use.names = F) + matrix(rows, nrow = nrow(el$comp), byrow = T) + } + else { + if(length(el$coef) != length(el$comp)) + stop("Length of contrast does not match the number of specified coefficients") + row <- rep(0, nc) + row[el$coef] <- el$comp + row + } } - } - } - if (is.null(el$group$right)) { - if (is.matrix(el$comp)) { - el$group$right <- rep(NA, nrow(el$comp)) - } else { - el$group$right <- NA - } - } else { - if (is.matrix(el$comp)) { - if (length(el$group$right) == 1) { - el$group$right <- rep(el$group$right, nrow(el$comp)) + }, nc) + Cmat <- do.call("rbind", CRows) + if(!keep.fac) + ccnams <- substring(ccnams, regexpr("\\_", ccnams) + 1, nchar(ccnams)) + cnam <- lapply(split(Cmat, 1:nrow(Cmat)), function(el, ccnams){ + namr <- ccnams[ifelse(el < 0, T, F)] + naml <- ccnams[ifelse(el > 0, T, F)] + c(paste(naml, collapse = ":"), paste(namr, collapse = ":")) + }, ccnams) + Cnam <- do.call("rbind", cnam) + gnams <- lapply(cons, function(el){ + if(!is.null(el$group)){ + if(!any(names(el$group) %in% c("left","right"))) + stop("group names must be \"left\" and \"right\".") + if(is.null(el$group$left)){ + if(is.matrix(el$comp)) + el$group$left <- rep(NA, nrow(el$comp)) + else el$group$left <- NA + } else { + if(is.matrix(el$comp)){ + if(length(el$group$left) == 1) + el$group$left <- rep(el$group$left, nrow(el$comp)) + if(length(el$group$left) != nrow(el$comp)) + stop("No. of group names do not match the number of comparisons in object") + } + } + if(is.null(el$group$right)){ + if(is.matrix(el$comp)) + el$group$right <- rep(NA, nrow(el$comp)) + else el$group$right <- NA + } else { + if(is.matrix(el$comp)) { + if(length(el$group$right) == 1) + el$group$right <- rep(el$group$right, nrow(el$comp)) + if(length(el$group$right) != nrow(el$comp)) + stop("No. of group names do not match the number of comparisons in object") + } + } + } else { + if(is.matrix(el$comp)) + el$group$left <- el$group$right <- rep(NA, nrow(el$comp)) + else el$group$left <- el$group$right <- NA } - if (length(el$group$right) != nrow(el$comp)) { - stop("No. of group names do not match the number of comparisons in object") - } - } - } - } else { - if (is.matrix(el$comp)) { - el$group$left <- el$group$right <- rep(NA, nrow(el$comp)) - } else { - el$group$left <- el$group$right <- NA + cbind(el$group$left, el$group$right) + }) + Gnam <- do.call("rbind", gnams) + Cnam[!is.na(Gnam[,1]), 1] <- Gnam[!is.na(Gnam[,1]),1] + Cnam[!is.na(Gnam[,2]), 2] <- Gnam[!is.na(Gnam[,2]),2] + for(i in 1:nrow(Cmat)) { + varmat <- sum(Cmat[i, ]*crossprod(vrb, t(Cmat)[, i])) + cse[i] <- sqrt(varmat * sigma2) + ctau[i] <- sum(Cmat[i, ]*tau) + cwtest[i] <- (ctau[i]/cse[i])^2 } - } - cbind(el$group$left, el$group$right) - }) - Gnam <- do.call("rbind", gnams) - Cnam[!is.na(Gnam[, 1]), 1] <- Gnam[!is.na(Gnam[, 1]), 1] - Cnam[!is.na(Gnam[, 2]), 2] <- Gnam[!is.na(Gnam[, 2]), 2] - for (i in 1:nrow(Cmat)) { - varmat <- sum(Cmat[i, ] * crossprod(vrb, t(Cmat)[, i])) - cse[i] <- sqrt(varmat * sigma2) - ctau[i] <- sum(Cmat[i, ] * tau) - cwtest[i] <- (ctau[i] / cse[i])^2 + cdf <- data.frame(wald = round(cwtest, 6), pval = 1 - pchisq(cwtest, 1), + coef = round(ctau, 6), se = round(cse, 6)) + attr(cdf, "names") <- c("Wald Statistic", "P-Value", "Cont. Coef.", "Std. Error") + attr(cdf, "row.names") <- paste(Cnam[,1], Cnam[,2], sep = " vs ") + oldClass(cdf) <- "data.frame" } - cdf <- data.frame( - wald = round(cwtest, 6), pval = round(1 - pchisq(cwtest, 1), 6), - coef = round(ctau, 6), se = round(cse, 6) - ) - cat("\nWald Tests: Comparisons\n\n") - attr(cdf, "names") <- c("Wald Statistic", "P-Value", "Cont. Coef.", "Std. Error") - attr(cdf, "row.names") <- paste(Cnam[, 1], Cnam[, 2], sep = " vs ") - oldClass(cdf) <- "data.frame" - } - if (length(zero)) { - ZRows <- lapply(zero, function(el, nc) { - rows <- rep(rep(0, nc), length(el$coef)) - dum <- seq(0, (length(el$coef) - 1) * nc, by = nc) - rows[el$coef + dum] <- 1 - matrix(rows, nrow = length(el$coef), byrow = T) - }, nc) - znam <- unlist(lapply(zero, function(el, ccnams) { - if (is.null(el$group)) { - paste(ccnams[el$coef], collapse = ":") - } else { - el$group - } - }, ccnams)) - if (any(table(znam) > 1)) { - stop("Duplicate names in group structures for zero equality tests.") - } - for (i in 1:length(ZRows)) { - varmat <- ZRows[[i]] %*% crossprod(vrb, t(ZRows[[i]])) - Ctau <- ZRows[[i]] %*% tau - zwtest[i] <- sum(Ctau * crossprod(solve(varmat), Ctau)) / sigma2 - zpval[i] <- 1 - pchisq(zwtest[i], nrow(ZRows[[i]])) + if(length(zero)) { + ZRows <- lapply(zero, function(el, nc){ + rows <- rep(rep(0, nc), length(el$coef)) + dum <- seq(0, (length(el$coef) - 1) * nc, by = nc) + rows[el$coef + dum] <- 1 + matrix(rows, nrow = length(el$coef), byrow = T) + }, nc) + znam <- unlist(lapply(zero, function(el, ccnams) { + if(is.null(el$group)) + paste(ccnams[el$coef], collapse = ":") + else el$group + }, ccnams)) + if(any(table(znam) > 1)) + stop("Duplicate names in group structures for zero equality tests.") + for(i in 1:length(ZRows)) { + varmat <- ZRows[[i]] %*% crossprod(vrb, t(ZRows[[i]])) + Ctau <- ZRows[[i]] %*% tau + zwtest[i] <- sum(Ctau*crossprod(solve(varmat), Ctau))/sigma2 + zpval[i] <- 1 - pchisq(zwtest[i], nrow(ZRows[[i]])) + } + zdf <- data.frame(wald = round(zwtest, 6), pval = zpval) + attr(zdf, "names") <- c("Wald Statistic", "P-Value") + attr(zdf, "row.names") <- znam + oldClass(zdf) <- "data.frame" } - zdf <- data.frame(wald = round(zwtest, 6), pval = round(zpval, 6)) - cat("\nWald Tests: Zero Equality\n\n") - attr(zdf, "names") <- c("Wald Statistic", "P-Value") - attr(zdf, "row.names") <- znam - oldClass(zdf) <- "data.frame" - } - res <- list(Contrasts = cdf, Zero = zdf) - invisible(res) + res <- list(Contrasts = cdf, Zero = zdf) + invisible(res) } #' Wald tests for linear hypotheses diff --git a/man/BiometryTools.Rd b/man/BiometryTools-package.Rd similarity index 79% rename from man/BiometryTools.Rd rename to man/BiometryTools-package.Rd index ac440a4..b451582 100644 --- a/man/BiometryTools.Rd +++ b/man/BiometryTools-package.Rd @@ -1,18 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/BiometryTools.R \docType{package} -\name{BiometryTools} -\alias{BiometryTools-package} +\name{BiometryTools-package} \alias{BiometryTools} -\title{BiometryTools: A package containing some useful functions for linear mixed model analysis} +\alias{BiometryTools-package} +\title{BiometryTools: A package containing useful functions for linear mixed model analysis} \description{ The BiometryTools package provides ... } -\section{Foo functions}{ - -The foo functions ... -} - \seealso{ Useful links: \itemize{ @@ -35,3 +30,4 @@ Other contributors: } } +\keyword{internal} diff --git a/man/associate.Rd b/man/associate.Rd index 96ed6a2..c920cc5 100644 --- a/man/associate.Rd +++ b/man/associate.Rd @@ -2,14 +2,9 @@ % Please edit documentation in R/associate.R \name{associate} \alias{associate} -\title{Associate BLUEs and BLUPs from an ASReml Model} +\title{Associate BLUEs with BLUPs form predict calls of the same model} \usage{ -associate( - model, - ran.term = "Treatment:Cultivar", - fix.term = "Treatment:Type", - ... -) +associate(model, ran.term = "Site:Genotype", fix.term = "Site:Type", ...) } \arguments{ \item{model}{An \code{asreml} model object. diff --git a/man/compare.Rd b/man/compare.Rd index 04c429d..95f1558 100644 --- a/man/compare.Rd +++ b/man/compare.Rd @@ -4,7 +4,16 @@ \alias{compare} \title{Pairwise comparison of BLUEs using LSD or p-values} \usage{ -compare(model, term = "Line", type = "PVAL", average.LSD = FALSE) +compare( + model, + term = "Treatment:Genotype", + by = NULL, + omit.string = NULL, + type = "HSD", + pev = TRUE, + fw.method = "none", + ... +) } \arguments{ \item{model}{An \code{asreml} fitted model object.} diff --git a/man/conv.Rd b/man/conv.Rd index 1c5392a..b531a4e 100644 --- a/man/conv.Rd +++ b/man/conv.Rd @@ -2,30 +2,108 @@ % Please edit documentation in R/conv3.R \name{conv} \alias{conv} -\title{Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments} +\title{Convert Treatment × Site BLUPs into Efficiency and Responsiveness} \usage{ conv(model, Env = "TSite:Variety", levs = NULL, sep = "-", ...) } \arguments{ -\item{model}{Final full Treatment x Site x Variety model} +\item{model}{An \code{asreml} fitted model object.} -\item{Env}{Treatment x Site x Variety term} +\item{Env}{Character string of the form \code{":"}, +e.g. \code{"TSite:Variety"}, used in \code{predict(..., classify = Env)}.} -\item{levs}{Named treatment levels used in transformation. i.e c("Treat1", "Treat2") would regress Treat2 on Treat1} +\item{levs}{Character vector of length 2 giving the two treatment levels. +\code{levs[1]} is treated as the baseline (efficiency).} -\item{sep}{separator used for Treat x Site names. Defaults to \code{-}} +\item{sep}{Separator used in Treatment × Site factor level names.} -\item{...}{Other arguments passed to \code{predict}} +\item{...}{Additional arguments passed to \code{predict()}.} } \value{ -Returns a list with BLUPs, and some other stuff JULES COMPLETE +A list containing: +\itemize{ +\item \code{blups}: data frame with columns \code{Site}, \code{Variety}, +\code{levs[1]}, \code{levs[2]}, and \code{resp} +\item \code{TGmat}: transformed covariance matrix +\item \code{Gmat}: original covariance matrix +\item \code{beta}: per-site regression coefficient +\item \code{sigr}: per-site residual variance +\item \code{tmat}: transformation matrix +} } \description{ -Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments +Given an \code{asreml} model with a Treatment × Site × Variety structure, +this function extracts BLUPs for a Treatment × Site interaction (classified +by Variety) and re-parameterizes them into: +\itemize{ +\item \strong{Efficiency}: the BLUP under a baseline treatment (\code{levs[1]}) +\item \strong{Responsiveness}: the residual BLUP under a second treatment +(\code{levs[2]}) after regressing \code{levs[2]} on \code{levs[1]} +using the random-effect covariance matrix +} +} +\details{ +For each site \eqn{s}, the regression coefficient is: + +\deqn{ + \beta_s = \mathrm{Cov}(T_1, T_2) / \mathrm{Var}(T_1) +} + +and responsiveness BLUPs are computed as: + +\deqn{ + b_{\mathrm{resp}} = b_{T_2} - \beta_s b_{T_1}. +} + +The function also returns the transformed covariance matrix +\eqn{G_{\mathrm{trans}} = T G T^\top} corresponding to the +efficiency/responsiveness parameterization. + +\strong{Important:} +If the fitted model assumes independent Treatment × Site effects +(e.g. \code{random = ~ TSite:Variety}), then the covariance between +treatments is zero and \eqn{\beta_s = 0}. In that case, +responsiveness reduces to the BLUP under \code{levs[2]}. } \examples{ \dontrun{ -JULES COMPLETE +library(asreml) +library(agridat) + +data(besag.met) +dat <- besag.met + +dat$county <- factor(dat$county) +dat$gen <- factor(dat$gen) +dat$rep <- factor(dat$rep) +dat$block <- factor(dat$block) + +# Create artificial 2-level treatment within each site +dat$Treat <- ave(seq_len(nrow(dat)), dat$county, FUN = function(i) { + rep(c("Treat1","Treat2"), length.out = length(i)) +}) +dat$Treat <- factor(dat$Treat) + +dat$TSite <- interaction(dat$Treat, dat$county, sep = "-") +dat$Variety <- dat$gen + +m <- asreml( + fixed = yield ~ Treat, + random = ~ rep + block + Variety + TSite:Variety, + data = dat +) + +out <- conv( + model = m, + Env = "TSite:Variety", + levs = c("Treat1","Treat2") +) + +head(out$blups) + +# In this model, treatments are independent, +# so beta will be zero: +out$beta } } diff --git a/man/extract.Rd b/man/extract.Rd index e98b5a6..aa773b9 100644 --- a/man/extract.Rd +++ b/man/extract.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{extract} \alias{extract} -\title{Title} +\title{Extract a rectangular grid region within groups and optionally pad missing cells} \usage{ extract( data, @@ -15,13 +15,65 @@ extract( ) } \arguments{ -\item{fill}{} +\item{data}{A data frame containing the grouping column specified by \code{split}, +the two coordinate columns specified by \code{pattern}, and a column named +\code{Type} used for matching.} + +\item{pattern}{A length-1 character string of the form \code{":"} giving +the names of the two coordinate columns (default \code{"Row:Column"}).} + +\item{match}{A character vector of \code{Type} values used to define the +bounding rectangle within each group (default \code{"DH"}).} + +\item{split}{A length-1 character string giving the column name used to split +\code{data} into groups (default \code{"Block"}).} + +\item{pad}{Logical; if \code{TRUE}, pad missing coordinate combinations inside the +rectangle by adding new rows (default \code{TRUE}).} + +\item{keep}{Integer index (or indices) of columns to copy from existing rows into +padded rows. Values are copied from the first \code{n_missing} rows of the extracted +rectangle. Default is \code{4}.} + +\item{fill}{Optional integer index (or indices) of columns to fill with the +string \code{"Blank"} in padded rows. If \code{NULL} (default), nothing is filled.} +} +\value{ +A data frame containing the extracted (and optionally padded) rows from all groups, +sorted by the first and second coordinate columns. If \code{pad = TRUE}, an additional +column \code{add} is included to indicate whether a row is original (\code{"old"}) or +padded (\code{"new"}). } \description{ -Title +Given a data frame containing two grid coordinate columns (e.g. Row and Column), +this function splits the data into groups (e.g. by Block), identifies the +bounding rectangle of rows whose \code{Type} matches \code{match}, then returns +all rows within that rectangle for each group. Optionally, missing Row-by-Column +combinations inside the rectangle are padded by adding new rows. +} +\details{ +For each group defined by \code{split}, rows with \code{Type} in \code{match} are used +to determine the minimum and maximum values of the two coordinate columns specified +by \code{pattern}. All rows in the group whose coordinates fall within these inclusive +bounds are returned. + +When \code{pad = TRUE}, the function constructs a contingency table of the extracted +coordinates and adds rows for any missing Row-by-Column combinations. Existing rows are +marked with \code{add = "old"} and padded rows with \code{add = "new"}. } \examples{ \dontrun{ -JULES COMPLETE +df <- data.frame( + Block = rep(1:2, each = 6), + Type = rep(c("DH", "X"), times = 6), + Row = rep(c("1","1","2"), times = 4), + Column= rep(c("1","2"), times = 6), + Value = seq_len(12) +) + +# Extract rectangle defined by Type == "DH" within each Block and pad missing cells +out <- extract(df, pattern = "Row:Column", match = "DH", split = "Block", + pad = TRUE, keep = 5, fill = 4) +head(out) } } diff --git a/man/fast.Rd b/man/fast.Rd index 1a076ff..97fefc4 100644 --- a/man/fast.Rd +++ b/man/fast.Rd @@ -2,18 +2,107 @@ % Please edit documentation in R/fast.R \name{fast} \alias{fast} -\title{FAST: overall performance and stability for interpreting Factor Analytic models} +\title{FAST: Overall performance and stability from a factor-analytic MET model} \usage{ -fast(model, dat = NULL, term = "fa(Site, 4):Genotype", ...) +fast(model, term = "fa(Site, 4):Genotype", ...) } \arguments{ -\item{...}{} +\item{model}{An \code{asreml} model object containing the FA term specified by \code{term}.} + +\item{term}{Character string giving the FA term of interest, typically of the form +\code{"fa(, ):"} (default \code{"fa(Site, 4):Genotype"}).} + +\item{...}{Additional arguments passed to \code{ASExtras4::fa.asreml()}.} + +\item{dat}{A data frame used only to obtain factor levels for the environment and genotype terms +named in \code{term}. Must contain those columns as factors (e.g. \code{Site} and \code{Genotype}).} +} +\value{ +A data frame with one row per Environment \eqn{\times} Genotype combination containing: +\itemize{ +\item the environment factor (e.g. \code{Site}), +\item \code{loads1}, \code{loads2}, ... and \code{spec.var} (from \code{ASExtras4::fa.asreml()}), +\item the genotype factor (e.g. \code{Genotype}), +\item \code{score1}, \code{score2}, ... (scores), +\item \code{fitted1}, \code{fitted2}, ... (per-factor fitted contributions), +\item \code{CVE} (sum of fitted contributions across factors), +\item \code{VE} (\code{CVE + spec.var}; included for convenience), +\item \code{OP} (overall performance; constant within genotype), +\item \code{dev} (deviation from factor-1 fitted contribution), +\item \code{stab} (mean squared deviation by genotype; \code{sqrt(stab)} gives RMSD scale). +} } \description{ -FAST: overall performance and stability for interpreting Factor Analytic models +Compute FAST-style summaries (overall performance and stability) from an +ASReml factor analytic (FA) mixed model fitted to variety-by-environment +(VE) effects. The method is based on the latent regression interpretation of +the FA model described by Smith & Cullis (2018). +} +\details{ +Consider the common VE (CVE) effects \eqn{\tilde\beta_{ij}} for genotype +\eqn{i} in environment \eqn{j} under an FA model with rotated loadings +\eqn{\hat\lambda_{rj}} and genotype scores \eqn{\tilde f_{ri}}. Smith & Cullis (2018) +separate the first factor from the remainder via the \emph{first latent regression} +representation, where \eqn{\tilde\epsilon_{ij}} collects the contributions from factors +\eqn{r = 2,\dots,k} (and can be interpreted as deviations about the first +latent regression line). + +When (almost) all rotated loadings for factor 1 are positive, Smith & Cullis (2018) +define \strong{overall performance} (OP) for genotype \eqn{i} as the fitted value at the +mean of the factor-1 loadings: +\deqn{OP_i = \bar\lambda_1 \tilde f_{1i},} +where \eqn{\bar\lambda_1} is the mean of \eqn{\hat\lambda_{1j}} across environments. + +They define \strong{stability} as the root mean squared deviation (RMSD) about the first +latent regression line: +\deqn{RMSD_i = \sqrt{\frac{1}{p}\sum_{j=1}^p (\tilde\beta_{ij} - \hat\lambda_{1j}\tilde f_{1i})^2},} +where \eqn{p} is the number of environments. + +This function reconstructs per-environment fitted contributions +\eqn{\widehat{\mathrm{fitted}}_{rij} = \hat\lambda_{rj}\tilde f_{ri}} and forms +\code{CVE} as their sum across factors. It then computes: +\itemize{ +\item \code{OP = mean(loads1) * score1} (matches \eqn{OP_i} above), +\item \code{dev = CVE - fitted1} (corresponds to \eqn{\tilde\beta_{ij}-\hat\lambda_{1j}\tilde f_{1i}}), +\item \code{stab = mean(dev^2)} by genotype (this is \emph{MSD}; RMSD is \code{sqrt(stab)}). +} } \examples{ \dontrun{ -JULES COMPLETE + +library(asreml) +data(oats) + +oats$Nitrogen <- as.factor(oats$Nitrogen) +oats$Variety <- as.factor(oats$Variety) +oats$Block <- as.factor(oats$Block) + +m_fa <- asreml( + fixed = yield ~ Nitrogen, # Fixed treatment levels + random = ~ Block + Block:Variety + fa(Nitrogen, 2):Variety, + residual = ~ units, + data = oats +) + +out <- fast(m_fa, dat = oats, term = "fa(Nitrogen, 2):Variety") + +op_by_var <- tapply(out$OP, out$Variety, unique) +msd_by_var <- tapply(out$stab, out$Variety, unique) +rmsd_by_var <- sqrt(msd_by_var) + +summary_tbl <- data.frame( + Variety = names(op_by_var), + OP = as.numeric(op_by_var), + MSD = as.numeric(msd_by_var), + RMSD = as.numeric(rmsd_by_var), + row.names = NULL +) + +summary_tbl[order(-summary_tbl$OP), ] +} + } +\references{ +Smith, A. B. & Cullis, B. R. (2018). Plant breeding selection tools built on factor analytic +mixed models for multi-environment trial data. \emph{Euphytica}, 214:143. } diff --git a/man/fastIC.Rd b/man/fastIC.Rd new file mode 100644 index 0000000..281311d --- /dev/null +++ b/man/fastIC.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fast.R +\name{fastIC} +\alias{fastIC} +\title{FAST-IC: FAST summaries within interaction classes from a factor-analytic MET model} +\usage{ +fastIC(model, term = "fa(Site, 4):Genotype", ic.num = 2, ...) +} +\arguments{ +\item{model}{An \code{asreml} model object containing the FA term specified by \code{term}.} + +\item{term}{Character string specifying the FA term of interest, +typically of the form \code{"fa(, ):"}. +Default is \code{"fa(Site, 4):Genotype"}.} + +\item{ic.num}{Integer specifying the number of FA factors used to define +interaction classes (default = 2).} + +\item{...}{Additional arguments passed to \code{ASExtras4::fa.asreml()}.} +} +\value{ +A data frame containing one row for each Environment × Genotype +combination with columns including: + +\itemize{ +\item environment factor (e.g. \code{Site}) +\item \code{iclass} — interaction class label +\item FA loadings (\code{loads1}, \code{loads2}, ...) +\item \code{spec.var} — specific variance +\item genotype scores (\code{score1}, \code{score2}, ...) +\item fitted FA contributions (\code{fitted1}, \code{fitted2}, ...) +\item \code{CVE} — common variety-by-environment effect +\item \code{OP} — overall performance within interaction class +\item \code{dev} — deviation from fitted FA contribution +\item \code{RMSD} — stability within interaction class +} +} +\description{ +Compute FAST-style summaries of genotype performance and stability within +\emph{interaction classes} (ICs) derived from a factor-analytic (FA) mixed model +fitted to multi-environment trial (MET) data using ASReml. +} +\details{ +Factor analytic (FA) linear mixed models are widely used to model +genotype-by-environment (G×E) interactions in multi-environment trials. +In this framework, the common variety-by-environment effects can be written +in terms of environment loadings and genotype scores. + +Smith & Cullis (2018) introduced the FAST approach, which interprets the FA +model using a \emph{latent regression representation}. In this interpretation, +the first factor captures the dominant pattern of genotype response across +environments, allowing summary measures of: + +\itemize{ +\item \strong{Overall performance (OP)} — the expected genotype response at +the mean of the first factor loadings. + +\item \strong{Stability} — typically measured using the root mean squared +deviation (RMSD) of genotype responses from the first latent regression line. +} + +Smith et al. (2021) extended this idea by introducing +\emph{interaction classes (iClasses)}. These classes group environments according +to the \strong{sign pattern of their FA loadings}, reflecting different +patterns of genotype response across environments. + +For a model with \eqn{k} factors, interaction classes are defined using the +sign pattern of the first \code{ic.num} loadings: + +\itemize{ +\item \code{"p"} indicates a positive loading +\item \code{"n"} indicates a negative loading +} + +For example, when \code{ic.num = 2}: + +\itemize{ +\item \code{"pp"} means both loadings are positive +\item \code{"pn"} means loading 1 positive, loading 2 negative +\item \code{"np"} means loading 1 negative, loading 2 positive +\item \code{"nn"} means both loadings are negative +} + +FAST summaries can then be computed \strong{within each interaction class}. +This produces class-specific summaries of genotype performance and stability. + +In this function: + +\itemize{ +\item Genotype scores \eqn{\tilde f_{ri}} are obtained from the FA model. +\item Environment loadings \eqn{\hat\lambda_{rj}} define interaction classes. +\item The fitted FA contributions +\eqn{\widehat{fitted}_{rij} = \hat\lambda_{rj}\tilde f_{ri}} are reconstructed. +\item The common variety-by-environment effect (CVE) is obtained as the +sum of fitted contributions across factors. +} + +Within each interaction class \eqn{c}, overall performance is calculated as: + +\deqn{ +OP_{i,c} = +\sum_{r=1}^{ic.num} +\bar{\lambda}_{r,c} \tilde f_{ri} +} + +where \eqn{\bar{\lambda}_{r,c}} is the mean loading of environments in class +\eqn{c}. + +Stability is summarised using the root mean squared deviation (RMSD): + +\deqn{ + RMSD_{i,c} = + \sqrt{ + \frac{1}{|c|} + \sum_{j \in c} + \left( + \tilde\beta_{ij} - + \sum_{r=1}^{ic.num}\hat\lambda_{rj}\tilde f_{ri} + \right)^2 + } +} + +where \eqn{\tilde\beta_{ij}} denotes the FA-predicted common VE effect. + +These summaries allow genotype performance to be compared within groups +of environments that share similar G×E response patterns. +} +\examples{ +\dontrun{ +} +} +\references{ +Smith, A., Norman, A., Kuchel, H., & Cullis, B. (2021). +Plant variety selection using interaction classes derived from factor analytic +linear mixed models: Models with independent variety effects. +\emph{Frontiers in Plant Science}, 12, 737462. +https://doi.org/10.3389/fpls.2021.737462 + +Smith, A. B., & Cullis, B. R. (2018). +Plant breeding selection tools built on factor analytic mixed models for +multi-environment trial data. +\emph{Euphytica}, 214, 143. +} diff --git a/man/fineMap.Rd b/man/fineMap.Rd index 17fca03..a9faea6 100644 --- a/man/fineMap.Rd +++ b/man/fineMap.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/fineMap.R \name{fineMap} \alias{fineMap} -\title{Fine map a \code{wgaim} object} +\title{Fine-map a QTL region around a nominated marker in a fitted \code{wgaim} model} \usage{ fineMap( model, @@ -14,10 +14,61 @@ fineMap( ) } \arguments{ -\item{...}{} +\item{model}{A fitted \code{wgaim} object (typically fit with ASReml-R via \code{asreml}). +The object must contain \code{model$QTL} components used by \code{wgaim}.} + +\item{intervalObj}{A \code{qtl::cross} object containing genotype data used for +interval mapping or genotype imputation. Must inherit class \code{"cross"}.} + +\item{mark}{Character scalar. The name of the focal marker around which to fine-map. +This must be a marker present in \code{intervalObj}.} + +\item{flanking}{Integer. Number of markers to include on each side of \code{mark} +when defining the scan window. Default is \code{50}.} + +\item{exclusion.window}{Numeric. Distance threshold for excluding nearby markers +when constructing the background marker set for each candidate marker. Markers +with \code{abs(dist_i - dist_j) < exclusion.window} are excluded. Default is \code{10000}.} + +\item{...}{Additional arguments passed to \code{update.asreml()} when refitting +the model for each candidate marker.} +} +\value{ +A data frame with one row per scanned marker in the flanking window and columns: +\describe{ +\item{mark}{Marker name (as in \code{qtl::pull.map(intervalObj, chr)}).} +\item{dist}{Map position of the marker (units as returned by \code{qtl::pull.map()}).} +\item{pvalue}{Per-marker p-value derived from a Wald-style statistic.} +\item{LOD}{Per-marker LOD score derived from the same statistic.} +} } \description{ -Fine map a \code{wgaim} object +Given a fitted \code{wgaim} object and a \code{qtl::cross} object containing +interval- or imputation-based genotype data, \code{fineMap()} scans a window +of markers around a nominated marker and refits the model for each candidate +marker. For each refit, the function extracts the candidate marker effect +(either from the fixed or random component, depending on the model method), +computes a Wald-style test statistic, and reports a per-marker p-value and +Logarithm of the Odds(LOD). +} +\details{ +The function: +\enumerate{ +\item Extracts genotype predictors from \code{intervalObj} (either +\code{interval.data} or \code{imputed.data}, depending on \code{model$QTL$type}). +\item Aligns genotype rows to the phenotype rows used by \code{model}, using +the ID column \code{model$QTL$diag$genetic.term}. +\item Defines a window of \code{flanking} markers on each side of \code{mark}. +\item For each candidate marker in the window, removes nearby markers within +\code{exclusion.window} (in the same units as the map positions returned by +\code{qtl::pull.map()}) from the background set used to build the covariance. +\item Constructs a covariance object via \code{wgaim:::constructCM()} and +assigns \code{covObj} into the parent frame (a side-effect required by +downstream \code{wgaim}/\code{asreml} update code). +\item Refits the model with the candidate marker added and the original QTL +marker removed, then extracts an effect estimate and its variance to compute +a test statistic, p-value, and LOD. +} } \examples{ \dontrun{ diff --git a/man/manhattan.Rd b/man/manhattan.Rd index e445016..222c556 100644 --- a/man/manhattan.Rd +++ b/man/manhattan.Rd @@ -2,15 +2,50 @@ % Please edit documentation in R/manhattan.R \name{manhattan} \alias{manhattan} -\title{Manhattan plot using ggplot} +\title{Manhattan plot for QTL diagnostics} \usage{ manhattan(mlist, cross, chr.in = NULL, annotate = TRUE, ...) } \arguments{ -\item{...}{} +\item{mlist}{Named list of QTL analysis results. Each element must contain: +\itemize{ +\item \code{QTL$diag$oint[[1]]}: a named numeric vector of statistics per marker +(names like \code{"Chr.1.abc"} are allowed), +\item \code{QTL$qtl}: a character vector of selected QTL marker names (used for +annotation when \code{annotate = TRUE}). +}} + +\item{cross}{A \code{qtl::cross} object containing marker maps in \code{cross$geno}. +Used to compute cumulative genome positions and chromosome labels.} + +\item{chr.in}{Optional character vector of chromosome IDs to include +(e.g. \code{c("1","2","X")}). Default \code{NULL} (use all chromosomes).} + +\item{annotate}{Logical. If \code{TRUE} (default), label selected QTL markers +(\code{x$QTL$qtl}) on the plot.} + +\item{...}{Currently unused.} +} +\value{ +A \code{ggplot2} object (a faceted Manhattan plot). If \code{annotate = TRUE}, +the returned plot includes text labels for selected markers. } \description{ -Manhattan plot using ggplot +Creates a Manhattan-style plot (via \code{ggplot2}) of the outlier/diagnostic +statistic across genome position for one or more analyses. +} +\details{ +The function expects \code{mlist} to be a named list of results where each element +contains QTL diagnostic output in \code{x$QTL$diag$oint[[1]]}. Marker positions are +taken from the supplied \code{cross} object (typically an \code{rqtl} cross). + +The statistic is plotted against a cumulative genome position computed from +the marker maps in \code{cross$geno}. Chromosomes are shown using alternating point +colours (two groups). The plot is faceted by the names of \code{mlist} (one panel +per analysis). + +If \code{chr.in} is supplied, both the cross object and the plotted markers are +filtered to those chromosomes. } \examples{ \dontrun{ diff --git a/man/outlier.down.Rd b/man/outlier.down.Rd deleted file mode 100644 index 09679a5..0000000 --- a/man/outlier.down.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/outliers.R -\name{outlier.down} -\alias{outlier.down} -\title{Downweight outliers} -\usage{ -outlier.down(data, model, cutoff = 3) -} -\arguments{ -\item{cutoff}{} -} -\description{ -Downweight outliers -} -\examples{ -\dontrun{ -JULES COMPLETE -} -} diff --git a/man/outlier.rem.Rd b/man/outlier.rem.Rd deleted file mode 100644 index 2b942cc..0000000 --- a/man/outlier.rem.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/outliers.R -\name{outlier.rem} -\alias{outlier.rem} -\title{Outlier removal function} -\usage{ -outlier.rem(data, model, cutoff = 3) -} -\arguments{ -\item{cutoff}{} -} -\description{ -Outlier removal function -} -\examples{ -\dontrun{ -JULES COMPLETE -} -} diff --git a/man/outliers.Rd b/man/outliers.Rd new file mode 100644 index 0000000..df57dc8 --- /dev/null +++ b/man/outliers.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/outliers.R +\name{outliers} +\alias{outliers} +\alias{outlier.down} +\alias{outlier.rem} +\title{Handle outliers using standardized residuals} +\usage{ +outlier.down(data, model, cutoff = 3) + +outlier.rem(data, model, cutoff = 3) +} +\arguments{ +\item{data}{A data frame containing the response columns referenced by \code{model}.} + +\item{model}{A named list of fitted model objects, one per trait/response. +Each element must contain standardized residuals at \code{x$aom$R[, 2]}. +Names of \code{model} are used to match response columns in \code{data}.} + +\item{cutoff}{Numeric. Absolute standardized residual threshold used to flag +outliers (default \code{3}).} +} +\value{ +\itemize{ +\item \code{outlier.down()} returns the input \code{data} with additional 0/1 indicator +columns appended. New columns are named \code{".o."}. +\item \code{outlier.rem()} returns a list with: +\describe{ +\item{data}{\code{data} with outlying responses set to \code{NA} (per trait).} +\item{out}{Named logical vector indicating whether each trait had any +outliers removed.} +} +} +} +\description{ +Utilities to flag outlying observations per trait using standardized residuals +from fitted models (typically \code{asreml}), with a common cutoff rule. + +Adds one indicator column per detected outlying observation (per trait). + +Sets outlying response values to \code{NA} (per trait) and reports which traits had +outliers removed. +} +\details{ +\code{outlier.down()} adds indicator (0/1) columns marking outlying rows, which can +be used as covariates or to downweight those observations in a subsequent fit. +\code{outlier.rem()} replaces outlying responses with \code{NA} (per trait) and returns +which traits had any outliers removed. + +Outliers are identified for each trait using +\code{abs(model[[trait]]$aom$R[, 2]) > cutoff}. + +\code{outlier.down()} will append new indicator columns; if columns with the same +prefix already exist (e.g. \code{"Trait.o.*"}), numbering continues from the +highest existing suffix. + +Both functions print the row indices of detected outliers. +} +\examples{ +\dontrun{ + +} +\dontrun{ + +} +} diff --git a/man/pad.data.Rd b/man/pad.data.Rd index 0478a92..8699c45 100644 --- a/man/pad.data.Rd +++ b/man/pad.data.Rd @@ -2,18 +2,63 @@ % Please edit documentation in R/pad.data.R \name{pad.data} \alias{pad.data} -\title{Title} +\title{Pad missing row–column positions within groups} \usage{ pad.data(data, pattern = "Row:Column", split = "Block", keep = 4, fill = NULL) } \arguments{ -\item{fill}{} +\item{data}{A data frame.} + +\item{pattern}{Character string of the form \code{"Row:Column"} giving the names of +the two variables that define the row–column layout.} + +\item{split}{Character. Name of the column used to split \code{data} into groups +(default \code{"Block"}). Padding is done within each group.} + +\item{keep}{Integer. Column index to copy from existing data into newly added +rows (default \code{4}). The value copied comes from the first rows of the group +(recycled as needed). This is typically used to keep a constant identifier +(e.g. block label or trial id).} + +\item{fill}{Optional. Integer or character specifying columns to set to \code{NA} +in newly created rows. Default \code{NULL} (do nothing).} +} +\value{ +A data frame like \code{data}, with additional rows added for missing +row–column combinations. A column \code{add} is added, with values \code{"old"} for +original rows and \code{"new"} for padded rows. The result is ordered by the +row and column variables in \code{pattern}. } \description{ -Title +Ensures that, within each level of a grouping variable (e.g. a block), +every combination of a row factor and a column factor exists. +Missing row–column positions are added as new rows with \code{NA}s. +} +\details{ +For each group defined by \code{split}, the function forms a contingency table +of \code{Row} by \code{Column}. For any zero-count cell, it adds a new row where: +\itemize{ +\item \code{Row} and \code{Column} are set to the missing combination, +\item all other columns are \code{NA} (except \code{keep}, which is copied), +\item \code{add} is set to \code{"new"}. +} +The row/column factors are then re-leveled to be in increasing numeric order +(assuming their levels are numeric strings). } \examples{ \dontrun{ -JULES COMPLETE +d <- expand.grid( + Block = factor(1:2), + Row = factor(1:3), + Column = factor(1:3) +) + +# Remove one cell to simulate a missing plot +d <- d[-5, ] +d # Inspect + +pad.data(d, pattern = "Row:Column", split = "Block", keep = 1) + } + } diff --git a/man/randomRegress.Rd b/man/randomRegress.Rd index 4f86960..90d7a76 100644 --- a/man/randomRegress.Rd +++ b/man/randomRegress.Rd @@ -4,7 +4,14 @@ \alias{randomRegress} \title{Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments} \usage{ -randomRegress(model, Env = "TSite:Variety", levs = NULL, sep = "-", ...) +randomRegress( + model, + Env = "TSite:Variety", + levs = NULL, + sep = "-", + pev = TRUE, + ... +) } \arguments{ \item{model}{An \code{asreml} object. The final full Treatment x Site x Variety model} diff --git a/man/themes.Rd b/man/themes.Rd index 075079b..c62ada3 100644 --- a/man/themes.Rd +++ b/man/themes.Rd @@ -6,7 +6,7 @@ \alias{theme_design_heat} \alias{theme_scatter} \alias{theme_barplot} -\title{Custom \code{ggplot2} themes} +\title{Custom ggplot2 themes} \usage{ theme_design(base_size = 11, base_family = "") @@ -17,24 +17,59 @@ theme_scatter(base_size = 11, base_family = "") theme_barplot(base_size = 11, base_family = "") } \arguments{ -\item{base_size}{Plot font size, given in pts.} +\item{base_size}{Base font size (in points).} -\item{base_family}{Font family used for the plot.} +\item{base_family}{Base font family.} +} +\value{ +A \code{ggplot2} theme object. } \description{ -Themes that provide some custom styling for \code{ggplot2} plots. +Minimal theme helpers used across the package. + +A clean theme for general plots with the legend removed. + +A design theme variant intended for heatmaps (legend kept; x labels rotated). + +Theme tuned for scatter plots (light grid and a panel border). + +Theme tuned for bar plots (legend at bottom; x labels rotated). +} +\details{ +These functions return \code{ggplot2::theme()} objects that can be added to a plot +with \code{+}. +\itemize{ +\item \code{theme_design()} removes most grid/axes clutter and hides the legend. +\item \code{theme_design_heat()} is intended for heatmaps (keeps legend, rotates x labels). +\item \code{theme_scatter()} uses a light grid and a panel border; larger text defaults. +\item \code{theme_barplot()} places the legend at the bottom and rotates x labels. +} } \examples{ \dontrun{ -SAM COMPLETE +library(ggplot2) +ggplot(mtcars, aes(wt, mpg)) + + geom_point() + + theme_design() } \dontrun{ -SAM COMPLETE +library(ggplot2) +df <- expand.grid(x = letters[1:5], y = letters[1:5]) +df$z <- seq_len(nrow(df)) +ggplot(df, aes(x, y, fill = z)) + + geom_tile() + + theme_design_heat() } \dontrun{ -SAM COMPLETE +library(ggplot2) +ggplot(mtcars, aes(wt, mpg)) + + geom_point() + + theme_scatter() } \dontrun{ -SAM COMPLETE +library(ggplot2) +ggplot(mtcars, aes(factor(cyl), fill = factor(gear))) + + geom_bar() + + theme_barplot() } } From eb600ad22d3cde22c45f1ef83f1d3f07986e6938 Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Mon, 9 Mar 2026 23:00:58 +1100 Subject: [PATCH 5/8] Added `prune()` and documentations for it. --- NAMESPACE | 1 + R/prune.R | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ man/fineMap.Rd | 7 +++ man/prune.Rd | 54 +++++++++++++++++++++++ 4 files changed, 177 insertions(+) create mode 100644 R/prune.R create mode 100644 man/prune.Rd diff --git a/NAMESPACE b/NAMESPACE index 13988d6..e06386f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(outlier.rem) export(pad.data) export(phenClones) export(phenfixClones) +export(prune) export(randomRegress) export(reinstall_packages) export(sunrise_time) diff --git a/R/prune.R b/R/prune.R new file mode 100644 index 0000000..0209ef1 --- /dev/null +++ b/R/prune.R @@ -0,0 +1,115 @@ +#' Prune a pedigree to individuals present in a dataset +#' +#' Removes individuals from a pedigree that are not represented in a data set, +#' keeping only the subset of the pedigree required for the specified individuals +#' and their ancestors up to a given number of generations. +#' +#' This function is based on the `prune()` implementation from the +#' *pedicure* package developed at the University of Wollongong +#' (David Butler, 2016). +#' +#' @param ped A pedigree data frame. The first column contains individual IDs, +#' the second column the dam (mother), and the third column the sire (father). +#' @param data A data frame containing individuals to retain. One column must +#' correspond to the individual IDs in `ped`. +#' @param gen Optional integer specifying the number of generations of ancestors +#' to retain. If `NULL`, the maximum number of generations present in the +#' pedigree is used. +#' +#' @return A pruned pedigree data frame containing only the individuals required +#' for the specified data individuals and their ancestors. +#' +#' @details +#' The function identifies individuals present in `data`, then constructs a +#' pedigree structure and calls `pedigree::trimPed()` to retain the required +#' individuals and their ancestors. Individuals in `data` that are absent from +#' `ped` will trigger a warning. +#' +#' +#' @export +#' +#' @examples +#' \dontrun{ +ped <- data.frame( + id = c("A","B","C","D"), + dam = c(NA, NA, "A", "A"), + sire = c(NA, NA, "B", "B") +) + +data <- data.frame(id = c("C","D")) + +prune(ped, data) +#' } +prune <- function(ped, data, gen = NULL) +{ + if(is.na(which <- match(names(ped)[1],names(data)))) + stop(paste("Cannot find",names(ped)[1],"in data")) + if(any(is.na(match(as.character(data[[which]]), as.character(ped[,1]))))) + warning("There are individuals in 'data' that are absent in 'ped'") + + data <- as.numeric(!is.na(match(as.character(ped[,1]), + as.character(data[[which]])))) + mmd <- data.frame(id = 1:nrow(ped), + dam = match(ped[,2],ped[,1],nomatch = 0), + sire = match(ped[,3],ped[,1],nomatch = 0)) + print(mmd) + if(is.null(gen)) { + gen <- max(countGen(mmd)) + } + what <- pedigree::trimPed(mmd, data, gen) + return(ped[what,]) +} + +#' Count generations in a pedigree +#' +#' Computes the generation number for each individual in a pedigree structure. +#' Founders (individuals with no recorded parents) are assigned generation 1, +#' and descendants are assigned one plus the maximum generation of their +#' parents. +#' +#' @param ped A data frame or matrix with columns: +#' \describe{ +#' \item{id}{Individual index} +#' \item{dam}{Index of the dam (mother), or 0 if unknown} +#' \item{sire}{Index of the sire (father), or 0 if unknown} +#' } +#' +#' @return A numeric vector giving the generation number for each individual. +#' +#' @details +#' The function iteratively propagates generation numbers through the pedigree +#' until all individuals have been assigned a generation level. +#' +#' @keywords internal +countGen <- function(ped) +{ + n <- nrow(ped) + gen <- rep(NA_integer_, n) + + # founders + founders <- ped$dam == 0 & ped$sire == 0 + gen[founders] <- 1 + + repeat { + old <- gen + + for (i in seq_len(n)) { + if (is.na(gen[i])) { + + dam <- ped$dam[i] + sire <- ped$sire[i] + + dam.gen <- if (dam > 0) gen[dam] else 1 + sire.gen <- if (sire > 0) gen[sire] else 1 + + if (!is.na(dam.gen) && !is.na(sire.gen)) { + gen[i] <- max(dam.gen, sire.gen) + 1 + } + } + } + + if (identical(old, gen)) break + } + + gen +} diff --git a/man/fineMap.Rd b/man/fineMap.Rd index a9faea6..da0119f 100644 --- a/man/fineMap.Rd +++ b/man/fineMap.Rd @@ -70,6 +70,13 @@ marker removed, then extracts an effect estimate and its variance to compute a test statistic, p-value, and LOD. } } +\section{Note}{ + +\code{fineMap()} assigns an object named \code{covObj} into the calling +environment (\code{parent.frame()}). This is required by internal \code{wgaim} +update routines that expect \code{covObj} to exist during model refits. +} + \examples{ \dontrun{ JULES COMPLETE diff --git a/man/prune.Rd b/man/prune.Rd new file mode 100644 index 0000000..70acfe6 --- /dev/null +++ b/man/prune.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prune.R +\name{prune} +\alias{prune} +\title{Prune a pedigree to individuals present in a dataset} +\usage{ +prune(ped, data, gen = NULL) +} +\arguments{ +\item{ped}{A pedigree data frame. The first column contains individual IDs, +the second column the dam (mother), and the third column the sire (father).} + +\item{data}{A data frame containing individuals to retain. One column must +correspond to the individual IDs in \code{ped}.} + +\item{gen}{Optional integer specifying the number of generations of ancestors +to retain. If \code{NULL}, the maximum number of generations present in the +pedigree is used.} +} +\value{ +A pruned pedigree data frame containing only the individuals required +for the specified data individuals and their ancestors. +} +\description{ +Removes individuals from a pedigree that are not represented in a data set, +keeping only the subset of the pedigree required for the specified individuals +and their ancestors up to a given number of generations. +} +\details{ +This function is based on the \code{prune()} implementation from the +\emph{pedicure} package developed at the University of Wollongong +(David Butler, 2016). + +The function identifies individuals present in \code{data}, then constructs a +pedigree structure and calls \code{pedigree::trimPed()} to retain the required +individuals and their ancestors. Individuals in \code{data} that are absent from +\code{ped} will trigger a warning. +} +\examples{ +\dontrun{ +ped <- data.frame( + id = c("A","B","C","D"), + dam = c(NA, NA, "A", "A"), + sire = c(NA, NA, "B", "B") +) + +data <- data.frame(id = c("C","D")) + +prune(ped, data) +} +} +\references{ +TODO +} From ba50b7364d1ce422a60f924d994154bf546949ed Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Sat, 14 Mar 2026 23:14:45 +1100 Subject: [PATCH 6/8] Added docos for `herit.asreml()`, `hsd()`, `randomRegress()` and `fixedRegress()` #5. Skipped the tests fpr R version > 4.0 atm. --- R/compare.R | 143 ++++++----- R/heritability.R | 80 ++++++- R/hsd.R | 64 ++++- R/prune.R | 20 +- R/regresion_functions.R | 288 +++++++++++------------ R/sunrise_sunset_times.R | 4 +- man/compare.Rd | 2 +- man/countGen.Rd | 30 +++ man/fixedRegress.Rd | 80 ++++++- man/herit.asreml.Rd | 77 +++++- man/hsd.Rd | 66 +++++- man/prune.Rd | 3 - man/randomRegress.Rd | 77 +++++- man/sunset_time.Rd | 7 + tests/testthat/setup-install_asreml.R | 1 - tests/testthat/teardown-install_asreml.R | 1 - tests/testthat/test-install_asreml.R | 8 +- 17 files changed, 690 insertions(+), 261 deletions(-) create mode 100644 man/countGen.Rd diff --git a/R/compare.R b/R/compare.R index 196fb43..4e89c1c 100644 --- a/R/compare.R +++ b/R/compare.R @@ -37,67 +37,90 @@ #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' TODO #' } -compare <- function(model, term = "Line", type = "PVAL", average.LSD = FALSE) { - pred <- predict(model, classify = term, sed = TRUE) - nterm <- unlist(strsplit(term, ":")) - pv <- pred$pvals - sed <- pred$sed - if (any(wh <- is.na(pv$predicted.value))) { - pv <- pv[!wh, ] - sed <- sed[!wh, !wh] - } - if (length(nterm) > 1) { - labs <- paste(pv[[nterm[1]]], pv[[nterm[2]]], sep = ":") - } else { - labs <- pv[[term]] - } - if (type %in% "LSD") { - add <- sed * qt(0.025, df = model$nedf, lower.tail = FALSE) - dimnames(add)[[2]] <- paste("LSD", labs, sep = ":") - if (average.LSD) { - add <- cbind.data.frame(ave.LSD = rep(mean(add[lower.tri(add)]), dim(pv)[1])) +compare <- function(model, term = "Treatment:Genotype", by = NULL, omit.string = NULL, type = "HSD", pev = TRUE, fw.method = "none", ...){ + pred <- predict(model, classify = term, vcov = TRUE, ...) + terms <- unlist(strsplit(term, ":")) + pv <- pred$pvals + inds <- !is.na(pv$predicted.value) + if(!pev & all(terms %in% all.vars(model$call$random))){ + varm <- summary(model, vparameters = TRUE)$vparameters[[term]] + if(length(terms) > 1) + len <- table(pv[,1])[1] + else len <- nrow(pv) + vara <- kronecker(varm, diag(len)) - pred$vcov + vara[inds, inds] + } else vara <- pred$vcov[inds, inds] + pv <- pv[inds,] + section <- FALSE + if(!is.null(by)){ + bys <- unlist(strsplit(by, ":")) + if(all(terms %in% bys)) + stop("Argument \"by\" indicates no multiple comparisons are being made.") + if(!all(bys %in% terms)) + stop("Some terms in argument \"by\" are not in \"term\".") + if(length(bys) > 1) + pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) + } else{ + by <- term + pv[[by]] <- by } - } else if (type %in% "PVAL") { - ord <- 1:nrow(pv) - if (length(nterm) > 1) { - fix.form <- paste(deparse(model$call$fixed), nterm[1], nterm[2], "1", sep = " - ") - model <- update(model, fixed. = fix.form, Cfixed = TRUE) - coefs <- model$coefficients$fixed - wh <- coefs[, 1] == 0 - cnams <- rownames(coefs)[!wh] - sp <- strsplit(cnams, ":") - left <- sapply(sp, "[", 1)[1] - right <- sapply(sp, "[", 2)[1] - leftn <- 1 - rightn <- 2 - if (any(grep(nterm[1], right))) { - leftn <- 2 - } - rightn <- 1 - left <- gsub(paste(nterm[1], "_", sep = ""), "", sapply(sp, "[", leftn)) - right <- gsub(paste(nterm[2], "_", sep = ""), "", sapply(sp, "[", rightn)) - ord <- pmatch(labs, paste(left, right, sep = ":")) - } else { - fix.form <- paste(deparse(model$call$fixed), "1", sep = " - ") - model <- update(model, fixed. = fix.form, Cfixed = TRUE) + if(!is.null(omit.string)){ + oind <- grep(omit.string, as.character(pv[[gnam]])) + if(length(oind)){ + pv <- pv[-oind,] + sed <- sed[-oind,-oind] + } } - cb <- t(combn(nrow(pv), 2)) - mat <- matrix(0, nrow = nrow(cb), ncol = nrow(pv)) - mat[cbind(1:nrow(mat), cb[, 1])] <- 1 - mat[cbind(1:nrow(mat), cb[, 2])] <- -1 - lenf <- length(model$coefficients$fixed) - cc <- list(coef = (1:lenf)[!wh], type = "con", comp = mat) - wt <- wald.test(model, list(cc))$Contrasts - pval <- wt$"P-Value" - add <- matrix(0, nrow = nrow(pv), ncol = nrow(pv)) - add[lower.tri(add)] <- pval - add <- add + t(add) - add <- add[ord, ord] - dimnames(add)[[2]] <- paste("PVAL", labs, sep = ":") - } else { - stop("This type is not defined.") - } - cbind.data.frame(pv[, 1:(length(nterm) + 1)], add) + sst <- as.character(pv[[by]]) + um <- unique(sst) + if(type %in% c("HSD","LSD")){ + tsd <- avsed <- c() + for(k in 1:length(um)){ + sinds <- sst %in% um[k] + svar <- vara[sinds, sinds] + avsed[k] <- sqrt(mean(apply(combn(diag(svar), 2), 2, sum) - 2*svar[lower.tri(svar)])) + if(type == "HSD") + tsd[k] <- (avsed[k]/sqrt(2))*qtukey(0.95, length(sinds), model$nedf) + else tsd[k] <- avsed[k]*qt(0.025, df = model$nedf, lower.tail = FALSE) + } + pv <- cbind.data.frame(pv[,1:(length(terms) + 2)]) + pv[[type]] <- rep(tsd, times = table(sst)) + pv[["sed"]] <- rep(avsed, times = table(sst)) + } + else if(type %in% "PVAL"){ + pvs <- split(pv, pv[[by]]) + yvar <- deparse(model$call$fixed[[2]]) + xvar <- labels(terms(as.formula(model$call$fixed))) + fix.form <- as.formula(paste(yvar, " ~ ", xvar[length(xvar)], " - 1", sep = "")) + model <- update(model, fixed. = fix.form, Cfixed = TRUE) + coefs <- model$coefficients$fixed + cinds <- grep(paste(terms, collapse = ".*"), rownames(coefs)) + coefs <- coefs[cinds,,drop = FALSE] + for(k in 1:length(um)){ + umt <- paste(strsplit(um[k], ":")[[1]], collapse = ".*") + sind <- cinds[grep(umt, rownames(coefs))] + scf <- coefs[grep(umt, rownames(coefs)),] + sna <- scf == 0 + aind <- sind[!sna] + pvt <- pvs[[k]] + cb <- t(combn(nrow(pvt), 2)) + mat <- matrix(0, nrow = nrow(cb), ncol = nrow(pvt)) + mat[cbind(1:nrow(mat), cb[,1])] <- 1 + mat[cbind(1:nrow(mat), cb[,2])] <- -1 + cc <- list(coef = aind, type = "con", comp = mat) + wt <- waldTest(model, list(cc))$Contrasts + pval <- wt$"P-Value" + add <- matrix(0, nrow = nrow(pvt), ncol = nrow(pvt)) + add[lower.tri(add)] <- stats::p.adjust(pval, method = fw.method) + add <- add + t(add) + # add <- add[ord, ord] + dimnames(add)[[2]] <- apply(pvt[,terms], 1, function(el) paste(el, collapse = ":")) + paste(as.character(pvt[[terms[[1]]]]), as.character(pvt[[terms[2]]]), sep = ":") + pvs[[k]] <- cbind.data.frame(pvs[[k]][,1:(length(terms) + 2)], add) + } + pv <- pvs + } else stop("Please use one of the allowable types, \"HSD\",\"LSD\",\"PVAL\"") + pv } diff --git a/R/heritability.R b/R/heritability.R index a32c77d..3d17fc5 100644 --- a/R/heritability.R +++ b/R/heritability.R @@ -1,18 +1,84 @@ -#' Heritability for multi/single environment trials +#' Estimate heritability from an `asreml` model #' -#' @param model -#' @param term -#' @param ... +#' Computes Cullis-style heritability from a fitted \code{asreml} model using +#' predicted genetic effects and their associated standard errors of difference +#' (SEDs). +#' +#' The function supports both: +#' \itemize{ +#' \item single-term models, such as \code{"Genotype"}, and +#' \item interaction terms for multi-environment trials, such as +#' \code{"SYear:Genotype"}. +#' } +#' +#' For interaction terms, heritability is calculated separately for each level +#' of the first factor in \code{term} (for example, each site or site-year), +#' using the diagonal genetic variances extracted from the fitted covariance +#' structure. +#' +#' The heritability is computed as: +#' \deqn{ +#' H^2 = 1 - \frac{\bar{SED}^2}{2\sigma_g^2} +#' } +#' where \eqn{\bar{SED}} is the average standard error of difference between +#' genotype predictions, and \eqn{\sigma_g^2} is the corresponding genetic +#' variance. +#' +#' @param model A fitted \code{asreml} model object. +#' @param term A character string giving the random term for which heritability +#' is to be calculated. This can be a single term such as \code{"Genotype"}, +#' or an interaction term such as \code{"SYear:Genotype"}. +#' @param ... Additional arguments passed to \code{predict.asreml()}. +#' +#' @details +#' For interaction terms, the function attempts to identify the corresponding +#' random-effect structure in the fitted model and currently supports: +#' \code{fa}, \code{diag}, \code{corh}, \code{corgh}, and \code{us}. +#' +#' If \code{term} is an interaction term, the function: +#' \enumerate{ +#' \item obtains predicted values for the interaction, +#' \item extracts the SED matrix among genotype predictions within each level +#' of the first factor in \code{term}, +#' \item computes the average pairwise SED for each level, and +#' \item combines this with the corresponding genetic variance to calculate +#' heritability. +#' } +#' +#' For a single term, a single heritability estimate is returned. #' #' @return -#' @export +#' A numeric vector of heritability estimates. +#' \itemize{ +#' \item If \code{term} is a single factor, a length-1 named vector is +#' returned. +#' \item If \code{term} is an interaction term, a named vector is returned +#' with one heritability estimate for each level of the first factor in +#' \code{term}. +#' } +#' +#' @note +#' This function is intended for use with \code{asreml} models fitted with +#' supported variance structures. For factor-analytic models, it relies on +#' \code{ASExtras4::fa.asreml()} to extract the genetic variance matrix. +#' +#' The function assumes that the first component of an interaction term +#' corresponds to the environment-like factor (for example, site, year, or +#' site-year), and the second corresponds to genotype. +#' +#' @seealso +#' \code{\link[asreml]{predict.asreml}}, +#' \code{ASExtras4::fa.asreml} +#' +#'@export #' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' TODO #' } +#' herit.asreml <- function(model, term = "SYear:Genotype", ...){ - dat <- eval(model$call$data) + dat <- eval(model$call$data) # Unused? if(length(grep(":", term))){ terms <- all.vars(as.formula(paste("~ ", term, sep = ""))) labs <- attr(terms(as.formula(model$call$random)), "term.labels") diff --git a/R/hsd.R b/R/hsd.R index 1879f02..b60282a 100644 --- a/R/hsd.R +++ b/R/hsd.R @@ -1,18 +1,68 @@ -#' Title +#' Compute Tukey-style HSD thresholds from `asreml` predictions #' -#' @param model -#' @param term -#' @param by -#' @param omit.string -#' @param ... +#' Computes a Tukey-style honestly significant difference (HSD) threshold from +#' predicted values obtained from a fitted \code{asreml} model, using the +#' associated standard errors of difference (SEDs). +#' +#' The function can calculate: +#' \itemize{ +#' \item a single HSD threshold across all predicted values, or +#' \item separate HSD thresholds within groups defined by \code{by}. +#' } +#' +#' This is useful for comparing predicted means or BLUPs from plant breeding +#' models, especially when predictions are made for interaction terms such as +#' \code{Treatment:Genotype}. +#' +#' @param model A fitted \code{asreml} model object. +#' @param term A character string specifying the term to be predicted and +#' compared, for example \code{"Treatment:Genotype"}. +#' @param by An optional character string specifying the factor(s) within which +#' multiple comparisons are to be made. This must be a subset of the factors +#' in \code{term}. For example, if \code{term = "Treatment:Genotype"}, setting +#' \code{by = "Treatment"} computes HSD values separately within each +#' treatment. +#' If \code{NULL}, a single HSD is computed across all predictions. +#' @param omit.string An optional character string used to omit rows whose +#' comparison factor matches the supplied pattern. +#' @param ... Additional arguments passed to \code{predict.asreml()}. +#' +#' @details +#' The function first obtains predicted values and their SED matrix using +#' \code{predict.asreml()}. It then calculates an average SED, either: +#' \itemize{ +#' \item across all predictions, or +#' \item within each section defined by \code{by}. +#' } +#' +#' The HSD threshold is computed as: +#' \deqn{ +#' HSD = \frac{\bar{SED}}{\sqrt{2}} \times q +#' } +#' where \eqn{\bar{SED}} is the average pairwise SED and \eqn{q} is the Tukey +#' critical value from \code{qtukey()} using the model denominator degrees of +#' freedom. +#' +#' The returned object is the prediction table with an added \code{HSD} column. #' #' @return +#' A data frame of predicted values returned by \code{predict.asreml()}, +#' with an additional column: +#' \describe{ +#' \item{HSD}{The Tukey-style honestly significant difference threshold, +#' either global or section-specific depending on \code{by}.} +#' } #' @export #' +#' @seealso +#' \code{\link[asreml]{predict.asreml}}, +#' \code{\link[stats]{qtukey}} +#' #' @examples #' \dontrun{ -#' JULES COMPLETE +#' TODO #' } +#' hsd <- function(model, term = "Treatment:Genotype", by = "Treatment", omit.string = NULL, ...) { pred <- predict(model, classify = term, sed = TRUE, ...) pv <- pred$pvals diff --git a/R/prune.R b/R/prune.R index 0209ef1..9d015a3 100644 --- a/R/prune.R +++ b/R/prune.R @@ -30,15 +30,15 @@ #' #' @examples #' \dontrun{ -ped <- data.frame( - id = c("A","B","C","D"), - dam = c(NA, NA, "A", "A"), - sire = c(NA, NA, "B", "B") -) - -data <- data.frame(id = c("C","D")) - -prune(ped, data) +#' ped <- data.frame( +#' id = c("A","B","C","D"), +#' dam = c(NA, NA, "A", "A"), +#' sire = c(NA, NA, "B", "B") +#' ) +#' +#' data <- data.frame(id = c("C","D")) +#' +#' prune(ped, data) #' } prune <- function(ped, data, gen = NULL) { @@ -60,7 +60,7 @@ prune <- function(ped, data, gen = NULL) return(ped[what,]) } -#' Count generations in a pedigree +#' Count generations in a pedigree (Does this makes sense for plant breeding??) #' #' Computes the generation number for each individual in a pedigree structure. #' Founders (individuals with no recorded parents) are assigned generation 1, diff --git a/R/regresion_functions.R b/R/regresion_functions.R index 86bd4e2..48c2433 100644 --- a/R/regresion_functions.R +++ b/R/regresion_functions.R @@ -1,20 +1,78 @@ -#' Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments +#' Convert treatment-specific BLUPs into efficiency and responsiveness #' -#' The function assumes you have a Treatment x Site factor that is a composite of treatments and sites. The function requires no specific ordering of the factor levels. +#' Re-parameterizes treatment-by-site-by-variety BLUPs from a fitted +#' \code{asreml} model into an efficiency/responsiveness representation. #' -#' @param model An `asreml` object. The final full Treatment x Site x Variety model -#' @param Env Treatment x Site x Variety term as a character. -#' @param levs Named treatment levels used in transformation. e.g. `c("Treat1", "Treat2")` would regress Treat2 on Treat1 -#' @param sep separator used for Treat x Site names (if multi-x model), if not present assumes single section -#' @param ... Other parameters passed to [asreml::predict.asreml()]. +#' The function assumes that the environment term supplied in \code{Env} +#' represents a treatment-by-site factor crossed with variety, for example +#' \code{"TSite:Variety"}, where levels of \code{TSite} combine treatment and +#' site information. +#' +#' Given two treatment levels in \code{levs}, the function treats the first as +#' a baseline ("efficiency") component and expresses the second as a +#' responsiveness component after regression on the first. +#' +#' @param model A fitted \code{asreml} model object containing the full +#' treatment-by-site-by-variety structure. +#' @param Env A character string giving the environment-by-variety term to be +#' transformed, for example \code{"TSite:Variety"}. +#' @param levs A character vector of length 2 giving the treatment levels used +#' in the transformation. The second treatment is regressed on the first. +#' @param sep A character string giving the separator used in composite +#' treatment-by-site level names. If no separator is present, the function +#' assumes a single section. +#' @param pev Logical; if \code{TRUE}, use the transformed prediction error +#' variance matrix. If \code{FALSE}, subtract the transformed prediction error +#' variance from the corresponding genetic variance structure. +#' @param ... Additional arguments passed to \code{predict.asreml()}. +#' +#' @details +#' For each site (or section), the function extracts the \eqn{2 \times 2} +#' covariance matrix for the two treatment levels and computes: +#' \deqn{ +#' \beta = \frac{\mathrm{Cov}(T_1, T_2)}{\mathrm{Var}(T_1)} +#' } +#' and the responsiveness variance: +#' \deqn{ +#' \sigma_r^2 = \mathrm{Var}(T_2)(1 - \rho^2), +#' } +#' where \eqn{\rho} is the correlation between the two treatment effects. +#' +#' The transformed responsiveness BLUP is then: +#' \deqn{ +#' b_{\mathrm{resp}} = b_{T_2} - \beta b_{T_1}. +#' } +#' +#' The function also returns the transformed covariance matrix +#' \eqn{G_{\mathrm{trans}} = T G T^\top} corresponding to the +#' efficiency/responsiveness parameterization. #' #' @return -#' @export +#' A list with components: +#' \describe{ +#' \item{blups}{A data frame containing site, variety, BLUPs for the two +#' specified treatment levels, the derived responsiveness value, and an HSD +#' summary where available.} +#' \item{TGmat}{The transformed covariance matrix under the +#' efficiency/responsiveness parameterization.} +#' \item{Gmat}{The original covariance matrix for the supplied environment +#' term.} +#' \item{beta}{Regression coefficients used to regress the second treatment on +#' the first within each site.} +#' \item{sigr}{Responsiveness variances within each site.} +#' \item{tmat}{The linear transformation matrix applied to \code{Gmat}.} +#' } +#' +#' @note +#' This function assumes exactly two treatment levels in \code{levs}. It is +#' primarily intended for treatment-by-site composite factors where the first +#' part or second part of the composite level name identifies treatment. #' #' @examples #' \dontrun{ -#' JULES COMPLETE -#' } +#' TODO +#'} +#' @export randomRegress <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", pev = TRUE, ...){ if(is.null(levs)) stop("Treatment levels cannnot be NULL.") @@ -99,8 +157,82 @@ randomRegress <- function(model, Env = "TSite:Variety", levs = NULL, sep = "-", list(blups = blups, TGmat = TGmat, Gmat = Gmat, beta = beta, sigr = sigr, tmat = tmat) } -## BLUEs regression +#' Compute a fixed-effect responsiveness index from predicted values +#' +#' Forms a regression-based responsiveness index from predicted values obtained +#' from a fitted \code{asreml} model. +#' +#' The function identifies two treatment levels within a prediction term and +#' compares matched predictions across a remaining regression variable +#' (for example genotype). The resulting responsiveness index is computed either +#' as: +#' \itemize{ +#' \item residuals from a simple linear regression of the second treatment on +#' the first, or +#' \item model-based conditional residuals using the prediction covariance +#' matrix. +#' } +#' +#' @param model A fitted \code{asreml} model object. +#' @param term A character string specifying the prediction term, for example +#' \code{"Treatment:Genotype"} or \code{"Treatment:Site:Genotype"}. +#' @param by An optional character string specifying variables used to split the +#' analysis into sections. These variables must be contained in \code{term}. +#' @param levs A character vector of length 2 giving the treatment levels to be +#' compared. +#' @param simple Logical; if \code{TRUE}, compute responsiveness as residuals +#' from a simple linear regression of treatment 2 on treatment 1. If +#' \code{FALSE}, compute responsiveness using the model-based prediction +#' covariance matrix. +#' +#' @details +#' The function first predicts the full \code{term} using +#' \code{predict.asreml(..., vcov = TRUE)}. It then identifies: +#' \itemize{ +#' \item the factor containing the treatment levels in \code{levs}, +#' \item optional grouping variables in \code{by}, and +#' \item the remaining variable(s) used to match observations across the two +#' treatment levels. +#' } +#' +#' For each split defined by \code{by}, matched predictions are extracted for +#' the two treatment levels. If \code{simple = TRUE}, a linear regression +#' \eqn{y_2 \sim y_1} is fitted and the residuals are returned as the +#' responsiveness index. If \code{simple = FALSE}, the responsiveness index is +#' computed from the conditional mean structure implied by the prediction +#' covariance matrix. +#' +#' The function also reports standard errors, an average SED, and a Tukey-style +#' HSD summary for the responsiveness index. +#' +#' @return +#' A data frame containing: +#' \describe{ +#' \item{Split}{The grouping level defined by \code{by}, or a default label if +#' no grouping is used.} +#' \item{Regress.Var}{The matching regression unit, for example genotype.} +#' \item{\code{levs[1]}}{Predicted value under the first treatment level.} +#' \item{\code{levs[2]}}{Predicted value under the second treatment level.} +#' \item{reponse.index}{The derived responsiveness index.} +#' \item{std.error}{Standard error of the responsiveness index.} +#' \item{HSD}{A Tukey-style HSD summary based on the average pairwise SED.} +#' \item{sed}{Average pairwise SED of the responsiveness index.} +#' } +#' +#' @note +#' This function assumes exactly two treatment levels in \code{levs}. It also +#' assumes that matched observations across treatments can be identified using +#' the remaining variable(s) in \code{term} after removing the treatment factor +#' and any grouping variables in \code{by}. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' TODO +#' } +#' fixedRegress <- function(model, term = "Treatment:Genotype", by = NULL, levs = NULL, simple = TRUE){ pterm <- term if(is.null(levs)) @@ -185,137 +317,3 @@ fixedRegress <- function(model, term = "Treatment:Genotype", by = NULL, levs = N resp.list <- resp.list[!sapply(resp.list, is.null)] do.call("rbind.data.frame", resp.list) } - -compare <- function(model, term = "Treatment:Genotype", by = NULL, omit.string = NULL, type = "HSD", pev = TRUE, fw.method = "none", ...){ - pred <- predict(model, classify = term, vcov = TRUE, ...) - terms <- unlist(strsplit(term, ":")) - pv <- pred$pvals - inds <- !is.na(pv$predicted.value) - if(!pev & all(terms %in% all.vars(model$call$random))){ - varm <- summary(model, vparameters = TRUE)$vparameters[[term]] - if(length(terms) > 1) - len <- table(pv[,1])[1] - else len <- nrow(pv) - vara <- kronecker(varm, diag(len)) - pred$vcov - vara[inds, inds] - } else vara <- pred$vcov[inds, inds] - pv <- pv[inds,] - section <- FALSE - if(!is.null(by)){ - bys <- unlist(strsplit(by, ":")) - if(all(terms %in% bys)) - stop("Argument \"by\" indicates no multiple comparisons are being made.") - if(!all(bys %in% terms)) - stop("Some terms in argument \"by\" are not in \"term\".") - if(length(bys) > 1) - pv[[by]] <- apply(pv[,bys], 1, function(el) paste(el, collapse = ":")) - } else{ - by <- term - pv[[by]] <- by - } - if(!is.null(omit.string)){ - oind <- grep(omit.string, as.character(pv[[gnam]])) - if(length(oind)){ - pv <- pv[-oind,] - sed <- sed[-oind,-oind] - } - } - sst <- as.character(pv[[by]]) - um <- unique(sst) - if(type %in% c("HSD","LSD")){ - tsd <- avsed <- c() - for(k in 1:length(um)){ - sinds <- sst %in% um[k] - svar <- vara[sinds, sinds] - avsed[k] <- sqrt(mean(apply(combn(diag(svar), 2), 2, sum) - 2*svar[lower.tri(svar)])) - if(type == "HSD") - tsd[k] <- (avsed[k]/sqrt(2))*qtukey(0.95, length(sinds), model$nedf) - else tsd[k] <- avsed[k]*qt(0.025, df = model$nedf, lower.tail = FALSE) - } - pv <- cbind.data.frame(pv[,1:(length(terms) + 2)]) - pv[[type]] <- rep(tsd, times = table(sst)) - pv[["sed"]] <- rep(avsed, times = table(sst)) - } - else if(type %in% "PVAL"){ - pvs <- split(pv, pv[[by]]) - yvar <- deparse(model$call$fixed[[2]]) - xvar <- labels(terms(as.formula(model$call$fixed))) - fix.form <- as.formula(paste(yvar, " ~ ", xvar[length(xvar)], " - 1", sep = "")) - model <- update(model, fixed. = fix.form, Cfixed = TRUE) - coefs <- model$coefficients$fixed - cinds <- grep(paste(terms, collapse = ".*"), rownames(coefs)) - coefs <- coefs[cinds,,drop = FALSE] - for(k in 1:length(um)){ - umt <- paste(strsplit(um[k], ":")[[1]], collapse = ".*") - sind <- cinds[grep(umt, rownames(coefs))] - scf <- coefs[grep(umt, rownames(coefs)),] - sna <- scf == 0 - aind <- sind[!sna] - pvt <- pvs[[k]] - cb <- t(combn(nrow(pvt), 2)) - mat <- matrix(0, nrow = nrow(cb), ncol = nrow(pvt)) - mat[cbind(1:nrow(mat), cb[,1])] <- 1 - mat[cbind(1:nrow(mat), cb[,2])] <- -1 - cc <- list(coef = aind, type = "con", comp = mat) - wt <- waldTest(model, list(cc))$Contrasts - pval <- wt$"P-Value" - add <- matrix(0, nrow = nrow(pvt), ncol = nrow(pvt)) - add[lower.tri(add)] <- stats::p.adjust(pval, method = fw.method) - add <- add + t(add) - # add <- add[ord, ord] - dimnames(add)[[2]] <- apply(pvt[,terms], 1, function(el) paste(el, collapse = ":")) - paste(as.character(pvt[[terms[[1]]]]), as.character(pvt[[terms[2]]]), sep = ":") - pvs[[k]] <- cbind.data.frame(pvs[[k]][,1:(length(terms) + 2)], add) - } - pv <- pvs - } else stop("Please use one of the allowable types, \"HSD\",\"LSD\",\"PVAL\"") - pv -} - -## BLUEs regression - -#' Fixed regression for doing stuff -#' -#' @param model -#' @param term -#' @param levs -#' @param robust -#' -#' @return -#' @export -#' -#' @examples -#' \dontrun{ -#' JULES COMPLETE -#' } -fixedRegress <- function(model, term = "Treatment:Genotype", levs = c("9 cm", "Control"), robust = TRUE) { - pred <- predict(model, classify = term, vcov = TRUE) - terms <- unlist(strsplit(term, ":")) - tnam <- terms[1] - gnam <- terms[2] - wt1 <- pred$pvals[[tnam]] %in% levs[1] - wt2 <- pred$pvals[[tnam]] %in% levs[2] - ptreat <- pred$pvals$predicted.value[wt1] - pcont <- pred$pvals$predicted.value[wt2] - vc <- as.matrix(pred$vcov) - s22 <- vc[wt2, wt2] - if (robust) { - s11 <- vc[wt1, wt1] - s12 <- vc[wt1, wt2] - resp <- ptreat - s12 %*% solve(s22) %*% pcont - resp.var <- s11 - s12 %*% solve(s22) %*% t(s12) - rdf <- model$nedf - } else { - lmr <- lm(ptreat ~ pcont) - resp <- lmr$residuals - xm <- model.matrix(~pcont) - vmat <- (diag(length(ptreat)) - xm %*% solve(t(xm) %*% xm) %*% t(xm)) - resp.var <- ((vmat) %*% t(vmat)) * (summary(lmr)$sigma^2) - rdf <- lmr$df.residual - } - std.error <- sqrt(diag(resp.var)) - sed <- sqrt(apply(combn(diag(resp.var), 2), 2, sum) - 2 * resp.var[lower.tri(resp.var)]) - respd <- cbind.data.frame(Genotype = levels(pred$pvals[[gnam]]), reponse.index = resp, std.error = std.error) - respd$HSD <- (mean(sed) / sqrt(2)) * qtukey(0.95, length(ptreat), df = rdf) - respd -} diff --git a/R/sunrise_sunset_times.R b/R/sunrise_sunset_times.R index f45a762..8b69535 100644 --- a/R/sunrise_sunset_times.R +++ b/R/sunrise_sunset_times.R @@ -78,10 +78,12 @@ sunrise_time <- function(datetime, latitude, longitude) { #' #' @export #' -#' @example +#' @examples +#' \dontrun{ #' library(lubridate) #' date <- as_datetime("2020-01-01", tz = "Australia/Adelaide") #' sunset_time(date, -35.69167, 136.9650) +#' } sunset_time <- function(datetime, latitude, longitude) { coordinates <- sp::SpatialPoints( matrix(c(longitude, latitude), ncol = 2), diff --git a/man/compare.Rd b/man/compare.Rd index 95f1558..776715e 100644 --- a/man/compare.Rd +++ b/man/compare.Rd @@ -56,6 +56,6 @@ rows/columns are removed from the SED matrix. } \examples{ \dontrun{ -JULES COMPLETE +TODO } } diff --git a/man/countGen.Rd b/man/countGen.Rd new file mode 100644 index 0000000..d871dac --- /dev/null +++ b/man/countGen.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prune.R +\name{countGen} +\alias{countGen} +\title{Count generations in a pedigree} +\usage{ +countGen(ped) +} +\arguments{ +\item{ped}{A data frame or matrix with columns: +\describe{ +\item{id}{Individual index} +\item{dam}{Index of the dam (mother), or 0 if unknown} +\item{sire}{Index of the sire (father), or 0 if unknown} +}} +} +\value{ +A numeric vector giving the generation number for each individual. +} +\description{ +Computes the generation number for each individual in a pedigree structure. +Founders (individuals with no recorded parents) are assigned generation 1, +and descendants are assigned one plus the maximum generation of their +parents. +} +\details{ +The function iteratively propagates generation numbers through the pedigree +until all individuals have been assigned a generation level. +} +\keyword{internal} diff --git a/man/fixedRegress.Rd b/man/fixedRegress.Rd index 53ab6af..99eddf2 100644 --- a/man/fixedRegress.Rd +++ b/man/fixedRegress.Rd @@ -2,23 +2,91 @@ % Please edit documentation in R/regresion_functions.R \name{fixedRegress} \alias{fixedRegress} -\title{Fixed regression for doing stuff} +\title{Compute a fixed-effect responsiveness index from predicted values} \usage{ fixedRegress( model, term = "Treatment:Genotype", - levs = c("9 cm", "Control"), - robust = TRUE + by = NULL, + levs = NULL, + simple = TRUE ) } \arguments{ -\item{robust}{} +\item{model}{A fitted \code{asreml} model object.} + +\item{term}{A character string specifying the prediction term, for example +\code{"Treatment:Genotype"} or \code{"Treatment:Site:Genotype"}.} + +\item{by}{An optional character string specifying variables used to split the +analysis into sections. These variables must be contained in \code{term}.} + +\item{levs}{A character vector of length 2 giving the treatment levels to be +compared.} + +\item{simple}{Logical; if \code{TRUE}, compute responsiveness as residuals +from a simple linear regression of treatment 2 on treatment 1. If +\code{FALSE}, compute responsiveness using the model-based prediction +covariance matrix.} +} +\value{ +A data frame containing: +\describe{ +\item{Split}{The grouping level defined by \code{by}, or a default label if +no grouping is used.} +\item{Regress.Var}{The matching regression unit, for example genotype.} +\item{\code{levs[1]}}{Predicted value under the first treatment level.} +\item{\code{levs[2]}}{Predicted value under the second treatment level.} +\item{reponse.index}{The derived responsiveness index.} +\item{std.error}{Standard error of the responsiveness index.} +\item{HSD}{A Tukey-style HSD summary based on the average pairwise SED.} +\item{sed}{Average pairwise SED of the responsiveness index.} +} } \description{ -Fixed regression for doing stuff +Forms a regression-based responsiveness index from predicted values obtained +from a fitted \code{asreml} model. +} +\details{ +The function identifies two treatment levels within a prediction term and +compares matched predictions across a remaining regression variable +(for example genotype). The resulting responsiveness index is computed either +as: +\itemize{ +\item residuals from a simple linear regression of the second treatment on +the first, or +\item model-based conditional residuals using the prediction covariance +matrix. +} + +The function first predicts the full \code{term} using +\code{predict.asreml(..., vcov = TRUE)}. It then identifies: +\itemize{ +\item the factor containing the treatment levels in \code{levs}, +\item optional grouping variables in \code{by}, and +\item the remaining variable(s) used to match observations across the two +treatment levels. +} + +For each split defined by \code{by}, matched predictions are extracted for +the two treatment levels. If \code{simple = TRUE}, a linear regression +\eqn{y_2 \sim y_1} is fitted and the residuals are returned as the +responsiveness index. If \code{simple = FALSE}, the responsiveness index is +computed from the conditional mean structure implied by the prediction +covariance matrix. + +The function also reports standard errors, an average SED, and a Tukey-style +HSD summary for the responsiveness index. +} +\note{ +This function assumes exactly two treatment levels in \code{levs}. It also +assumes that matched observations across treatments can be identified using +the remaining variable(s) in \code{term} after removing the treatment factor +and any grouping variables in \code{by}. } \examples{ \dontrun{ -JULES COMPLETE +TODO } + } diff --git a/man/herit.asreml.Rd b/man/herit.asreml.Rd index 88cabb8..3c2c9ae 100644 --- a/man/herit.asreml.Rd +++ b/man/herit.asreml.Rd @@ -2,18 +2,87 @@ % Please edit documentation in R/heritability.R \name{herit.asreml} \alias{herit.asreml} -\title{Heritability for multi/single environment trials} +\title{Estimate heritability from an \code{asreml} model} \usage{ herit.asreml(model, term = "SYear:Genotype", ...) } \arguments{ -\item{...}{} +\item{model}{A fitted \code{asreml} model object.} + +\item{term}{A character string giving the random term for which heritability +is to be calculated. This can be a single term such as \code{"Genotype"}, +or an interaction term such as \code{"SYear:Genotype"}.} + +\item{...}{Additional arguments passed to \code{predict.asreml()}.} +} +\value{ +A numeric vector of heritability estimates. +\itemize{ +\item If \code{term} is a single factor, a length-1 named vector is +returned. +\item If \code{term} is an interaction term, a named vector is returned +with one heritability estimate for each level of the first factor in +\code{term}. +} } \description{ -Heritability for multi/single environment trials +Computes Cullis-style heritability from a fitted \code{asreml} model using +predicted genetic effects and their associated standard errors of difference +(SEDs). +} +\details{ +The function supports both: +\itemize{ +\item single-term models, such as \code{"Genotype"}, and +\item interaction terms for multi-environment trials, such as +\code{"SYear:Genotype"}. +} + +For interaction terms, heritability is calculated separately for each level +of the first factor in \code{term} (for example, each site or site-year), +using the diagonal genetic variances extracted from the fitted covariance +structure. + +The heritability is computed as: +\deqn{ +H^2 = 1 - \frac{\bar{SED}^2}{2\sigma_g^2} +} +where \eqn{\bar{SED}} is the average standard error of difference between +genotype predictions, and \eqn{\sigma_g^2} is the corresponding genetic +variance. + +For interaction terms, the function attempts to identify the corresponding +random-effect structure in the fitted model and currently supports: +\code{fa}, \code{diag}, \code{corh}, \code{corgh}, and \code{us}. + +If \code{term} is an interaction term, the function: +\enumerate{ +\item obtains predicted values for the interaction, +\item extracts the SED matrix among genotype predictions within each level +of the first factor in \code{term}, +\item computes the average pairwise SED for each level, and +\item combines this with the corresponding genetic variance to calculate +heritability. +} + +For a single term, a single heritability estimate is returned. +} +\note{ +This function is intended for use with \code{asreml} models fitted with +supported variance structures. For factor-analytic models, it relies on +\code{ASExtras4::fa.asreml()} to extract the genetic variance matrix. + +The function assumes that the first component of an interaction term +corresponds to the environment-like factor (for example, site, year, or +site-year), and the second corresponds to genotype. } \examples{ \dontrun{ -JULES COMPLETE +TODO +} + } +\seealso{ +\code{\link[asreml]{predict.asreml}}, +\code{ASExtras4::fa.asreml} } diff --git a/man/hsd.Rd b/man/hsd.Rd index ccdce4f..c09414f 100644 --- a/man/hsd.Rd +++ b/man/hsd.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/hsd.R \name{hsd} \alias{hsd} -\title{Title} +\title{Compute Tukey-style HSD thresholds from \code{asreml} predictions} \usage{ hsd( model, @@ -13,13 +13,71 @@ hsd( ) } \arguments{ -\item{...}{} +\item{model}{A fitted \code{asreml} model object.} + +\item{term}{A character string specifying the term to be predicted and +compared, for example \code{"Treatment:Genotype"}.} + +\item{by}{An optional character string specifying the factor(s) within which +multiple comparisons are to be made. This must be a subset of the factors +in \code{term}. For example, if \code{term = "Treatment:Genotype"}, setting +\code{by = "Treatment"} computes HSD values separately within each +treatment. +If \code{NULL}, a single HSD is computed across all predictions.} + +\item{omit.string}{An optional character string used to omit rows whose +comparison factor matches the supplied pattern.} + +\item{...}{Additional arguments passed to \code{predict.asreml()}.} +} +\value{ +A data frame of predicted values returned by \code{predict.asreml()}, +with an additional column: +\describe{ +\item{HSD}{The Tukey-style honestly significant difference threshold, +either global or section-specific depending on \code{by}.} +} } \description{ -Title +Computes a Tukey-style honestly significant difference (HSD) threshold from +predicted values obtained from a fitted \code{asreml} model, using the +associated standard errors of difference (SEDs). +} +\details{ +The function can calculate: +\itemize{ +\item a single HSD threshold across all predicted values, or +\item separate HSD thresholds within groups defined by \code{by}. +} + +This is useful for comparing predicted means or BLUPs from plant breeding +models, especially when predictions are made for interaction terms such as +\code{Treatment:Genotype}. + +The function first obtains predicted values and their SED matrix using +\code{predict.asreml()}. It then calculates an average SED, either: +\itemize{ +\item across all predictions, or +\item within each section defined by \code{by}. +} + +The HSD threshold is computed as: +\deqn{ +HSD = \frac{\bar{SED}}{\sqrt{2}} \times q +} +where \eqn{\bar{SED}} is the average pairwise SED and \eqn{q} is the Tukey +critical value from \code{qtukey()} using the model denominator degrees of +freedom. + +The returned object is the prediction table with an added \code{HSD} column. } \examples{ \dontrun{ -JULES COMPLETE +TODO +} + } +\seealso{ +\code{\link[asreml]{predict.asreml}}, +\code{\link[stats]{qtukey}} } diff --git a/man/prune.Rd b/man/prune.Rd index 70acfe6..ea0216a 100644 --- a/man/prune.Rd +++ b/man/prune.Rd @@ -49,6 +49,3 @@ data <- data.frame(id = c("C","D")) prune(ped, data) } } -\references{ -TODO -} diff --git a/man/randomRegress.Rd b/man/randomRegress.Rd index 90d7a76..ad2d612 100644 --- a/man/randomRegress.Rd +++ b/man/randomRegress.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/regresion_functions.R \name{randomRegress} \alias{randomRegress} -\title{Conversion function for Efficiency and Responsiveness BLUPs in Treatment x Site x Variety experiments} +\title{Convert treatment-specific BLUPs into efficiency and responsiveness} \usage{ randomRegress( model, @@ -14,21 +14,82 @@ randomRegress( ) } \arguments{ -\item{model}{An \code{asreml} object. The final full Treatment x Site x Variety model} +\item{model}{A fitted \code{asreml} model object containing the full +treatment-by-site-by-variety structure.} -\item{Env}{Treatment x Site x Variety term as a character.} +\item{Env}{A character string giving the environment-by-variety term to be +transformed, for example \code{"TSite:Variety"}.} -\item{levs}{Named treatment levels used in transformation. e.g. \code{c("Treat1", "Treat2")} would regress Treat2 on Treat1} +\item{levs}{A character vector of length 2 giving the treatment levels used +in the transformation. The second treatment is regressed on the first.} -\item{sep}{separator used for Treat x Site names (if multi-x model), if not present assumes single section} +\item{sep}{A character string giving the separator used in composite +treatment-by-site level names. If no separator is present, the function +assumes a single section.} -\item{...}{Other parameters passed to \code{\link[asreml:predict.asreml]{asreml::predict.asreml()}}.} +\item{pev}{Logical; if \code{TRUE}, use the transformed prediction error +variance matrix. If \code{FALSE}, subtract the transformed prediction error +variance from the corresponding genetic variance structure.} + +\item{...}{Additional arguments passed to \code{predict.asreml()}.} +} +\value{ +A list with components: +\describe{ +\item{blups}{A data frame containing site, variety, BLUPs for the two +specified treatment levels, the derived responsiveness value, and an HSD +summary where available.} +\item{TGmat}{The transformed covariance matrix under the +efficiency/responsiveness parameterization.} +\item{Gmat}{The original covariance matrix for the supplied environment +term.} +\item{beta}{Regression coefficients used to regress the second treatment on +the first within each site.} +\item{sigr}{Responsiveness variances within each site.} +\item{tmat}{The linear transformation matrix applied to \code{Gmat}.} +} } \description{ -The function assumes you have a Treatment x Site factor that is a composite of treatments and sites. The function requires no specific ordering of the factor levels. +Re-parameterizes treatment-by-site-by-variety BLUPs from a fitted +\code{asreml} model into an efficiency/responsiveness representation. +} +\details{ +The function assumes that the environment term supplied in \code{Env} +represents a treatment-by-site factor crossed with variety, for example +\code{"TSite:Variety"}, where levels of \code{TSite} combine treatment and +site information. + +Given two treatment levels in \code{levs}, the function treats the first as +a baseline ("efficiency") component and expresses the second as a +responsiveness component after regression on the first. + +For each site (or section), the function extracts the \eqn{2 \times 2} +covariance matrix for the two treatment levels and computes: +\deqn{ +\beta = \frac{\mathrm{Cov}(T_1, T_2)}{\mathrm{Var}(T_1)} +} +and the responsiveness variance: +\deqn{ +\sigma_r^2 = \mathrm{Var}(T_2)(1 - \rho^2), +} +where \eqn{\rho} is the correlation between the two treatment effects. + +The transformed responsiveness BLUP is then: +\deqn{ +b_{\mathrm{resp}} = b_{T_2} - \beta b_{T_1}. +} + +The function also returns the transformed covariance matrix +\eqn{G_{\mathrm{trans}} = T G T^\top} corresponding to the +efficiency/responsiveness parameterization. +} +\note{ +This function assumes exactly two treatment levels in \code{levs}. It is +primarily intended for treatment-by-site composite factors where the first +part or second part of the composite level name identifies treatment. } \examples{ \dontrun{ -JULES COMPLETE +TODO } } diff --git a/man/sunset_time.Rd b/man/sunset_time.Rd index a36dc7f..f5b74a9 100644 --- a/man/sunset_time.Rd +++ b/man/sunset_time.Rd @@ -19,3 +19,10 @@ A POSIXct object for the local time of sunset. \description{ Return the time of sunset given the date and GPS coordinates. } +\examples{ +\dontrun{ +library(lubridate) +date <- as_datetime("2020-01-01", tz = "Australia/Adelaide") +sunset_time(date, -35.69167, 136.9650) +} +} diff --git a/tests/testthat/setup-install_asreml.R b/tests/testthat/setup-install_asreml.R index fcc36b5..62026a7 100644 --- a/tests/testthat/setup-install_asreml.R +++ b/tests/testthat/setup-install_asreml.R @@ -1,4 +1,3 @@ -testthat::skip("Skipping set-install_asreml tests during development") expect_file_2 <- function(fn, args, pat, dir = ".", missing = F) { x <- do.call(fn, args) if (!missing) { diff --git a/tests/testthat/teardown-install_asreml.R b/tests/testthat/teardown-install_asreml.R index da22797..62d1103 100644 --- a/tests/testthat/teardown-install_asreml.R +++ b/tests/testthat/teardown-install_asreml.R @@ -1,4 +1,3 @@ -testthat::skip("Skipping teardown-install_asreml tests during development") # Delete file if it exists if(length(list.files(pattern = "asreml+(([a-zA-Z0-9_.\\-])*)+(.zip|.tar.gz|.tgz)", recursive = T))>0) { file.remove(list.files(pattern = "asreml+(([a-zA-Z0-9_.\\-])*)+(.zip|.tar.gz|.tgz)", recursive = T)) diff --git a/tests/testthat/test-install_asreml.R b/tests/testthat/test-install_asreml.R index 64a3d9d..1e1f713 100644 --- a/tests/testthat/test-install_asreml.R +++ b/tests/testthat/test-install_asreml.R @@ -1,13 +1,15 @@ -testthat::skip("Skipping test-install_asreml tests during development") skip_if(R.version$status == "Under development (unstable)") +skip_if(getRversion() > "4.0", "install_asreml() URL table is outdated for newer R versions") test_that("Installation works", { expect_equal(install_asreml(), TRUE) }) test_that("Installation provides output on success", { - # skip_if(R.version$status == "Under development (unstable)") - if("asreml" %in% installed.packages()[,1]){remove.packages("asreml")} + if ("asreml" %in% rownames(installed.packages())) { + remove.packages("asreml") + } + expect_message(install_asreml(), "ASreml-R successfully installed!") }) From 5b78a0b78089ab4acf1d40caa2942d29212756a5 Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Sat, 14 Mar 2026 23:26:18 +1100 Subject: [PATCH 7/8] updated R-cmd-check.yaml to use newer GitHub Actions versions --- .github/workflows/R-CMD-check.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fe6751c..4f12857 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -31,13 +31,13 @@ jobs: RSPM: ${{ matrix.config.rspm }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-pandoc@master + - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | @@ -48,7 +48,7 @@ jobs: - name: Cache R packages if: runner.os != 'Windows' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} @@ -76,7 +76,7 @@ jobs: - name: Upload check results if: failure() - uses: actions/upload-artifact@main + uses: actions/upload-artifact@v4 with: name: ${{ runner.os }}-r${{ matrix.config.r }}-results path: check From de447c1e6e73db1fa8bc1bb870d854049e20b467 Mon Sep 17 00:00:00 2001 From: Jiazhe Lin Date: Sat, 14 Mar 2026 23:39:23 +1100 Subject: [PATCH 8/8] Update `ASExtras4` dependency as Suggests --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 83a5570..157c983 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ License: MIT + file LICENSE URL: https://biometryhub.github.io/BiometryTools/ BugReports: https://github.com/biometryhub/BiometryTools/issues Imports: - ASExtras4, crayon, ggplot2, glue, @@ -30,6 +29,7 @@ Imports: RColorBrewer, wgaim Suggests: + ASExtras4, lubridate, maptools, ncdf4,