From 2998d9bacbd3c24ce8bf8f56471652950f7cba2c Mon Sep 17 00:00:00 2001 From: Isis Arend da Silva Date: Fri, 6 Dec 2024 15:27:55 +1000 Subject: [PATCH] Add trans="cloglog" and trans="boxcox" Add option for back-transformation for "cloglog" link in binomial regression and for other type of boxcox transformation than the one in the documentation. --- R/mct.R | 1068 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 553 insertions(+), 515 deletions(-) diff --git a/R/mct.R b/R/mct.R index 4b8a7c1..85f3494 100644 --- a/R/mct.R +++ b/R/mct.R @@ -1,515 +1,553 @@ -#' Perform Multiple Comparison Tests on a statistical model -#' -#' A function for comparing and ranking predicted means with Tukey's Honest Significant Difference (HSD) Test. -#' -#' @param model.obj An ASReml-R or aov model object. Will likely also work with `lme` ([nlme::lme()]), `lmerMod` ([lme4::lmer()]) models as well. -#' @param classify Name of predictor variable as string. -#' @param sig The significance level, numeric between 0 and 1. Default is 0.05. -#' @param int.type The type of confidence interval to calculate. One of `ci`, `1se` or `2se`. Default is `ci`. -#' @param trans Transformation that was applied to the response variable. One of `log`, `sqrt`, `logit`, `power` or `inverse`. Default is `NA`. -#' @param offset Numeric offset applied to response variable prior to transformation. Default is `NA`. Use 0 if no offset was applied to the transformed data. See Details for more information. -#' @param power Numeric power applied to response variable with power transformation. Default is `NA`. See Details for more information. -#' @param decimals Controls rounding of decimal places in output. Default is 2 decimal places. -#' @param descending Logical (default `FALSE`). Order of the output sorted by the predicted value. If `TRUE`, largest will be first, through to smallest last. -#' @param plot Automatically produce a plot of the output of the multiple comparison test? Default is `FALSE`. This is maintained for backwards compatibility, but the preferred method now is to use `autoplot()`. See [biometryassist::autoplot.mct()] for more details. -#' @param label_height Height of the text labels above the upper error bar on the plot. Default is 0.1 (10%) of the difference between upper and lower error bars above the top error bar. -#' @param rotation Rotate the text output as Treatments within the plot. Allows for easier reading of long treatment labels. Number between 0 and 360 (inclusive) - default 0 -#' @param save Logical (default `FALSE`). Save the predicted values to a csv file? -#' @param savename A file name for the predicted values to be saved to. Default is `predicted_values`. -#' @param order Deprecated. Use `descending` instead. -#' @param pred Deprecated. Use `classify` instead. -#' @param pred.obj Deprecated. Predicted values are calculated within the function from version 1.0.1 onwards. -#' @param ... Other arguments passed through to `predict.asreml()`. -#' -#' @importFrom multcompView multcompLetters -#' @importFrom emmeans emmeans -#' @importFrom stats model.frame predict qtukey qt terms var -#' @importFrom utils packageVersion -#' -#' @details Some transformations require that data has a small offset applied, otherwise it will cause errors (for example taking a log of 0, or square root of negative values). In order to correctly reverse this offset, if the `trans` argument is supplied, an offset value must also be supplied. If there was no offset required for a transformation, then use a value of 0 for the `offset` argument. -#' -#' @return A list containing a data frame with predicted means, standard errors, confidence interval upper and lower bounds, and significant group allocations (named `predicted_values`), as well as a plot visually displaying the predicted values (named `predicted_plot`). If some of the predicted values are aliased, a warning is printed, and the aliased treatment levels are returned in the output (named `aliased`). -#' -#' @references Jørgensen, E. & Pedersen, A. R. (1997). How to Obtain Those Nasty Standard Errors From Transformed Data - and Why They Should Not Be Used. [](https://pure.au.dk/portal/en/publications/how-to-obtain-those-nasty-standard-errors-from-transformed-data--and-why-they-should-not-be-used(d649ca20-d15f-11db-8e26-000ea68e967b).html) -#' -#' @examples -#' # Fit aov model -#' model <- aov(Petal.Length ~ Species, data = iris) -#' -#' # Display the ANOVA table for the model -#' anova(model) -#' -#' # Determine ranking and groups according to Tukey's Test -#' pred.out <- multiple_comparisons(model, classify = "Species") -#' -#' # Display the predicted values table -#' pred.out -#' -#' # Show the predicted values plot -#' autoplot(pred.out, label_height = 0.5) -#' -#' -#' -#' \dontrun{ -#' # ASReml-R Example -#' library(asreml) -#' -#' #Fit ASReml Model -#' model.asr <- asreml(yield ~ Nitrogen + Variety + Nitrogen:Variety, -#' random = ~ Blocks + Blocks:Wplots, -#' residual = ~ units, -#' data = asreml::oats) -#' -#' wald(model.asr) #Nitrogen main effect significant -#' -#' #Determine ranking and groups according to Tukey's Test -#' pred.out <- multiple_comparisons(model.obj = model.asr, classify = "Nitrogen", -#' descending = TRUE, decimals = 5) -#' -#' pred.out -#' -#' # Example using a box-cox transformation -#' set.seed(42) # See the seed for reproducibility -#' resp <- rnorm(n = 50, 5, 1)^3 -#' trt <- as.factor(sample(rep(LETTERS[1:10], 5), 50)) -#' block <- as.factor(rep(1:5, each = 10)) -#' ex_data <- data.frame(resp, trt, block) -#' -#' # Change one treatment random values to get significant difference -#' ex_data$resp[ex_data$trt=="A"] <- rnorm(n = 5, 7, 1)^3 -#' -#' model.asr <- asreml(resp ~ trt, -#' random = ~ block, -#' residual = ~ units, -#' data = ex_data) -#' -#' resplot(model.asr) -#' -#' # Perform Box-Cox transformation and get maximum value -#' out <- MASS::boxcox(ex_data$resp~ex_data$trt) -#' out$x[which.max(out$y)] # 0.3838 -#' -#' # Fit cube root to the data -#' model.asr <- asreml(resp^(1/3) ~ trt, -#' random = ~ block, -#' residual = ~ units, -#' data = ex_data) -#' resplot(model.asr) # residual plots look much better -#' -#' #Determine ranking and groups according to Tukey's Test -#' pred.out <- multiple_comparisons(model.obj = model.asr, -#' classify = "trt", -#' trans = "power", power = (1/3)) -#' -#' pred.out -#' autoplot(pred.out) -#' } -#' -#' @export -#' -multiple_comparisons <- function(model.obj, - classify, - sig = 0.05, - int.type = "ci", - trans = NA, - offset = NA, - power = NA, - decimals = 2, - descending = FALSE, - plot = FALSE, - label_height = 0.1, - rotation = 0, - save = FALSE, - savename = "predicted_values", - order, - pred.obj, - pred, - ...) { - - rlang::check_dots_used() - - if(!missing(pred)) { - warning("Argument `pred` has been deprecated and will be removed in a future version. Please use `classify` instead.") - classify <- pred - } - - if(!missing(order)) { - warning("Argument `order` has been deprecated and will be removed in a future version. Please use `descending` instead.") - } - - if(sig > 0.5) { - warning("Significance level given by `sig` is high. Perhaps you meant ", 1-sig, "?", call. = FALSE) - } - - # Get the individual names provided in classify - vars <- unlist(strsplit(classify, "\\:")) - reserved_col_names <- c("predicted.value", "std.error", "Df", - "groups", "PredictedValue", "ApproxSE", "ci", "low", "up") - if(any(vars %in% reserved_col_names)) { - stop("Invalid column name. Please change the name of column(s): ", vars[vars %in% reserved_col_names]) - } - - if(inherits(model.obj, "asreml")){ - - if(classify %!in% c(attr(stats::terms(model.obj$formulae$fixed), 'term.labels'), - attr(stats::terms(model.obj$formulae$random), 'term.labels'))) { - stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE) - } - - if(!missing(pred.obj)) { - warning("Argument `pred.obj` has been deprecated and will be removed in a future version. Predictions are now performed internally in the function.") - } - - pred.obj <- quiet(asreml::predict.asreml(model.obj, classify = classify, sed = TRUE, trace = FALSE, ...)) - # Check if all the predicted values are NA. If so, suggests the need of the `present` argument - if(all(is.na(pred.obj$pvals$predicted.value)) & all(is.na(pred.obj$pvals$std.error))) { - stop("All predicted values are aliased. Perhaps you need the `present` argument?") - } - - # Check if any treatments are aliased, and remove them and print a warning - if(anyNA(pred.obj$pvals$predicted.value)) { - aliased <- which(is.na(pred.obj$pvals$predicted.value)) - # Get the level values of the aliased treatments - # If only one treatment (classify does not contain :) all levels printed separated with , - # If multiple treatments, first need to concatenate columns, then collapse rows - aliased_names <- pred.obj$pvals[aliased, !names(pred.obj$pvals) %in% c("predicted.value", "std.error", "status")] - - # This pastes rows of the dataframe together across the columns, and turns into a vector - if(is.data.frame(aliased_names)) { - aliased_names <- apply(aliased_names, 1, paste, collapse = ":") - } - - if(length(aliased_names) > 1) { - warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.") - } - else { - warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.") - } - - pred.obj$pvals <- pred.obj$pvals[!is.na(pred.obj$pvals$predicted.value),] - pred.obj$pvals <- droplevels(pred.obj$pvals) - pred.obj$sed <- pred.obj$sed[-aliased, -aliased] - warning(warn_string, call. = FALSE) - } - - #For use with asreml 4+ - if(utils::packageVersion("asreml") > "4") { - pp <- pred.obj$pvals - sed <- pred.obj$sed - } - - pp <- pp[!is.na(pp$predicted.value),] - pp$status <- NULL - - dat.ww <- quiet(asreml::wald(model.obj, ssType = "conditional", denDF = "default", trace = FALSE)$Wald) - - dendf <- data.frame(Source = row.names(dat.ww), denDF = dat.ww$denDF) - - ifelse(grepl(":", classify), - pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"), - pp$Names <- pp[[classify]]) - - ndf <- dendf$denDF[grepl(classify, dendf$Source) & nchar(classify) == nchar(as.character(dendf$Source))] - if(rlang::is_empty(ndf)) { - ndf <- model.obj$nedf - rand_terms <- vars[vars %in% attr(stats::terms(model.obj$formulae$random), 'term.labels')] - warning(rand_terms, " is not a fixed term in the model. The denominator degrees of freedom are estimated using the residual degrees of freedom. This may be inaccurate.", call. = FALSE) - } - crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed - - # Grab the response from the formula to create plot Y label - ylab <- model.obj$formulae$fixed[[2]] - } - - else if(inherits(model.obj, c("aov", "lm", "lmerMod", "lmerModLmerTest"))) { - if(classify %!in% attr(stats::terms(model.obj), 'term.labels')) { - stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE) - } - - on.exit(options(emmeans = emmeans::emm_defaults)) - emmeans::emm_options("msg.interaction" = FALSE, "msg.nesting" = FALSE) - pred.out <- emmeans::emmeans(model.obj, as.formula(paste("~", classify))) - - sed <- pred.out@misc$sigma*sqrt(outer(1/pred.out@grid$.wgt., 1/pred.out@grid$.wgt., "+")) - pred.out <- as.data.frame(pred.out) - pred.out <- pred.out[,!grepl("CL", names(pred.out))] - pp <- pred.out - names(pp)[names(pp) == "emmean"] <- "predicted.value" - names(pp)[names(pp) == "SE"] <- "std.error" - - - # SED <- matrix(data = sed, nrow = nrow(pp), ncol = nrow(pp)) - diag(sed) <- NA - ifelse(grepl(":", classify), - pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"), - pp$Names <- pp[[classify]]) - - if(anyNA(pp$predicted.value)) { - aliased <- which(is.na(pp$predicted.value)) - # Get the level values of the aliased treatments - # If only one treatment (classify does not contain :) all levels printed separated with , - # If multiple treatments, first need to concatenate columns, then collapse rows - aliased_names <- pp[aliased, !names(pp) %in% c("predicted.value", "std.error", "df", "Names")] - - # This pastes rows of the dataframe together across the columns, and turns into a vector - if(is.data.frame(aliased_names)) { - aliased_names <- apply(aliased_names, 1, paste, collapse = ":") - } - - if(length(aliased_names) > 1) { - warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.") - } - else { - warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.") - } - - pp <- pp[!is.na(pp$predicted.value),] - pp <- droplevels(pp) - sed <- sed[-aliased, -aliased] - warning(warn_string, call. = FALSE) - } - - ndf <- pp$df[1] - crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed - - # Grab the response from the formula to create plot Y label - if(inherits(model.obj, c("lmerMod", "lmerModLmerTest"))) { - ylab <- model.obj@call[[2]][[2]] - } - else { - ylab <- model.obj$terms[[2]] - } - } - - else { - stop("Models of type ", class(model.obj), " are not supported.") - } - - # Check that the predicted levels don't contain a dash -, if they do replace and display warning - if(any(any(grepl("-", pp$Names) | grepl("-", pp[,1])))) { - levs <- unique(c(grep("-", pp[,1], value = TRUE), grep("-", pp$Names, value = TRUE))) - if(length(levs)>1) { - warning("The treatment levels ", paste(levs, collapse = ", "), " contained '-', which has been replaced in the final output with '_'") - } - else { - warning("The treatment level ", levs, " contained '-', which has been replaced in the final output with '_'") - } - pp[,1] <- gsub(pattern = "-", replacement = "_", pp[,1]) - pp$Names <- gsub(pattern = "-", replacement = "_", pp$Names) - } - - Names <- as.character(pp$Names) - - # Determine pairs that are significantly different - diffs <- abs(outer(pp$predicted.value, pp$predicted.value, "-")) > crit.val - # (diffs*sqrt(2))/sed # check against ptukey (use top/bottom triangle) - diffs <- diffs[lower.tri(diffs)] - - # Create a vector of treatment comparison names - m <- outer(pp$Names, pp$Names, paste, sep="-") - m <- m[lower.tri(m)] - - names(diffs) <- m - - ll <- multcompView::multcompLetters3("Names", "predicted.value", diffs, pp, reversed = !descending) - - rr <- data.frame(groups = ll$Letters) - rr$Names <- row.names(rr) - - pp.tab <- merge(pp,rr) - - if(!is.na(trans)){ - if(is.na(offset)) { - warning("Offset value assumed to be 0. Change with `offset` argument.") - offset <- 0 - } - - if(trans == "sqrt"){ - pp.tab$PredictedValue <- (pp.tab$predicted.value)^2 - ifelse(!is.na(offset), offset, 0) - pp.tab$ApproxSE <- 2*abs(pp.tab$std.error)*sqrt(pp.tab$PredictedValue) - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0) - pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0) - } - - if(trans == "log"){ - pp.tab$PredictedValue <- exp(pp.tab$predicted.value) - ifelse(!is.na(offset), offset, 0) - pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$low <- exp(pp.tab$predicted.value - pp.tab$ci) - ifelse(!is.na(offset), offset, 0) - pp.tab$up <- exp(pp.tab$predicted.value + pp.tab$ci) - ifelse(!is.na(offset), offset, 0) - } - - if(trans == "logit"){ - pp.tab$PredictedValue <- exp(pp.tab$predicted.value)/(1 + exp(pp.tab$predicted.value)) - pp.tab$ApproxSE <- pp.tab$PredictedValue * (1 - pp.tab$PredictedValue)* abs(pp.tab$std.error) - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$ll <- pp.tab$predicted.value - pp.tab$ci - pp.tab$low <- exp(pp.tab$ll)/(1 + exp(pp.tab$ll)) - pp.tab$uu <- pp.tab$predicted.value + pp.tab$ci - pp.tab$up <- exp(pp.tab$uu)/(1 + exp(pp.tab$uu)) - - pp.tab$ll <- NULL - pp.tab$uu <- NULL - } - - if(trans == "power"){ - pp.tab$PredictedValue <- (pp.tab$predicted.value)^(1/power) - ifelse(!is.na(offset), offset, 0) - pp.tab$ApproxSE <- pp.tab$std.error*(1/(power*pp.tab$PredictedValue^(power-1))) - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0) - pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0) - } - - if(trans == "inverse"){ - pp.tab$PredictedValue <- 1/pp.tab$predicted.value - pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue^2 - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$low <- 1/(pp.tab$predicted.value - pp.tab$ci) - pp.tab$up <- 1/(pp.tab$predicted.value + pp.tab$ci) - } - } - - else { - - if(int.type == "ci"){ - pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error - } - if(int.type == "1se"){ - pp.tab$ci <- pp.tab$std.error - } - if(int.type == "2se"){ - pp.tab$ci <- 2*pp.tab$std.error - } - pp.tab$low <- pp.tab$predicted.value - pp.tab$ci - pp.tab$up <- pp.tab$predicted.value + pp.tab$ci - - } - - pp.tab <- pp.tab[base::order(pp.tab$predicted.value, decreasing = descending),] - - pp.tab$Names <- NULL - trtindex <- max(unlist(lapply(paste0("^", vars, "$"), grep, x = names(pp.tab)))) - - trtnam <- names(pp.tab)[1:trtindex] - # Exclude reserved column names - trtnam <- trtnam[trtnam %!in% c("predicted.value", "std.error", "Df", - "groups", "PredictedValue", "ApproxSE", "ci", "low", "up")] - - for(i in seq_along(trtnam)){ - pp.tab[[trtnam[i]]] <- factor(pp.tab[[trtnam[i]]], levels = unique(pp.tab[[trtnam[i]]])) - } - - # rounding to the correct number of decimal places - pp.tab <- rapply(object = pp.tab, f = round, classes = "numeric", how = "replace", digits = decimals) - - if(save) { - write.csv(pp.tab, file = paste0(savename, ".csv"), row.names = FALSE) - } - - # If there are brackets in the label, grab the text from inside - if(is.call(ylab)) { - ylab <- as.character(ylab)[2] - } - attr(pp.tab, "ylab") <- ylab - - if(length(vars)>2) { - classify3 <- vars[3] - } - else if(length(vars) > 1) { - classify2 <- vars[2] - classify <- vars[1] - } - - class(pp.tab) <- c("mct", class(pp.tab)) - if(plot) { - print(autoplot(pp.tab)) - } - - if(exists("aliased_names")) { - attr(pp.tab, 'aliased') <- as.character(aliased_names) - } - - # Add the critical value as an attribute - if(stats::var(as.vector(crit.val), na.rm = TRUE) < 1e-10) { - attr(pp.tab, 'HSD') <- crit.val[1,2] - } - else { - attr(pp.tab, 'HSD') <- crit.val - } - - rownames(pp.tab) <- NULL - - return(pp.tab) -} - - -#' Print output of multiple_comparisons -#' -#' @param x An mct object to print to the console. -#' @param ... Other arguments -#' -#' @return The original object invisibly. -#' @seealso [multiple_comparisons()] -#' @method print mct -#' @export -#' @examples -#' dat.aov <- aov(Petal.Width ~ Species, data = iris) -#' output <- multiple_comparisons(dat.aov, classify = "Species") -#' print(output) -print.mct <- function(x, ...) { - stopifnot(inherits(x, "mct")) - print.data.frame(x, ...) - - if(!is.null(attr(x, "aliased"))) { - aliased <- attr(x, "aliased") - if(length(aliased) > 1) { - cat("\nAliased levels are:", paste(aliased[1:(length(aliased)-1)], collapse = ", "), "and", aliased[length(aliased)], "\n") - } - else { - cat("\nAliased level is:", aliased, "\n") - } - } - invisible(x) -} - +#' Perform Multiple Comparison Tests on a statistical model +#' +#' A function for comparing and ranking predicted means with Tukey's Honest Significant Difference (HSD) Test. +#' +#' @param model.obj An ASReml-R or aov model object. Will likely also work with `lme` ([nlme::lme()]), `lmerMod` ([lme4::lmer()]) models as well. +#' @param classify Name of predictor variable as string. +#' @param sig The significance level, numeric between 0 and 1. Default is 0.05. +#' @param int.type The type of confidence interval to calculate. One of `ci`, `1se` or `2se`. Default is `ci`. +#' @param trans Transformation that was applied to the response variable. One of `log`, `sqrt`, `logit`, `power` or `inverse`. Default is `NA`. +#' @param offset Numeric offset applied to response variable prior to transformation. Default is `NA`. Use 0 if no offset was applied to the transformed data. See Details for more information. +#' @param power Numeric power applied to response variable with power transformation. Default is `NA`. See Details for more information. +#' @param decimals Controls rounding of decimal places in output. Default is 2 decimal places. +#' @param descending Logical (default `FALSE`). Order of the output sorted by the predicted value. If `TRUE`, largest will be first, through to smallest last. +#' @param plot Automatically produce a plot of the output of the multiple comparison test? Default is `FALSE`. This is maintained for backwards compatibility, but the preferred method now is to use `autoplot()`. See [biometryassist::autoplot.mct()] for more details. +#' @param label_height Height of the text labels above the upper error bar on the plot. Default is 0.1 (10%) of the difference between upper and lower error bars above the top error bar. +#' @param rotation Rotate the text output as Treatments within the plot. Allows for easier reading of long treatment labels. Number between 0 and 360 (inclusive) - default 0 +#' @param save Logical (default `FALSE`). Save the predicted values to a csv file? +#' @param savename A file name for the predicted values to be saved to. Default is `predicted_values`. +#' @param order Deprecated. Use `descending` instead. +#' @param pred Deprecated. Use `classify` instead. +#' @param pred.obj Deprecated. Predicted values are calculated within the function from version 1.0.1 onwards. +#' @param ... Other arguments passed through to `predict.asreml()`. +#' +#' @importFrom multcompView multcompLetters +#' @importFrom emmeans emmeans +#' @importFrom stats model.frame predict qtukey qt terms var +#' @importFrom utils packageVersion +#' +#' @details Some transformations require that data has a small offset applied, otherwise it will cause errors (for example taking a log of 0, or square root of negative values). In order to correctly reverse this offset, if the `trans` argument is supplied, an offset value must also be supplied. If there was no offset required for a transformation, then use a value of 0 for the `offset` argument. +#' +#' @return A list containing a data frame with predicted means, standard errors, confidence interval upper and lower bounds, and significant group allocations (named `predicted_values`), as well as a plot visually displaying the predicted values (named `predicted_plot`). If some of the predicted values are aliased, a warning is printed, and the aliased treatment levels are returned in the output (named `aliased`). +#' +#' @references Jørgensen, E. & Pedersen, A. R. (1997). How to Obtain Those Nasty Standard Errors From Transformed Data - and Why They Should Not Be Used. [](https://pure.au.dk/portal/en/publications/how-to-obtain-those-nasty-standard-errors-from-transformed-data--and-why-they-should-not-be-used(d649ca20-d15f-11db-8e26-000ea68e967b).html) +#' +#' @examples +#' # Fit aov model +#' model <- aov(Petal.Length ~ Species, data = iris) +#' +#' # Display the ANOVA table for the model +#' anova(model) +#' +#' # Determine ranking and groups according to Tukey's Test +#' pred.out <- multiple_comparisons(model, classify = "Species") +#' +#' # Display the predicted values table +#' pred.out +#' +#' # Show the predicted values plot +#' autoplot(pred.out, label_height = 0.5) +#' +#' +#' +#' \dontrun{ +#' # ASReml-R Example +#' library(asreml) +#' +#' #Fit ASReml Model +#' model.asr <- asreml(yield ~ Nitrogen + Variety + Nitrogen:Variety, +#' random = ~ Blocks + Blocks:Wplots, +#' residual = ~ units, +#' data = asreml::oats) +#' +#' wald(model.asr) #Nitrogen main effect significant +#' +#' #Determine ranking and groups according to Tukey's Test +#' pred.out <- multiple_comparisons(model.obj = model.asr, classify = "Nitrogen", +#' descending = TRUE, decimals = 5) +#' +#' pred.out +#' +#' # Example using a box-cox transformation +#' set.seed(42) # See the seed for reproducibility +#' resp <- rnorm(n = 50, 5, 1)^3 +#' trt <- as.factor(sample(rep(LETTERS[1:10], 5), 50)) +#' block <- as.factor(rep(1:5, each = 10)) +#' ex_data <- data.frame(resp, trt, block) +#' +#' # Change one treatment random values to get significant difference +#' ex_data$resp[ex_data$trt=="A"] <- rnorm(n = 5, 7, 1)^3 +#' +#' model.asr <- asreml(resp ~ trt, +#' random = ~ block, +#' residual = ~ units, +#' data = ex_data) +#' +#' resplot(model.asr) +#' +#' # Perform Box-Cox transformation and get maximum value +#' out <- MASS::boxcox(ex_data$resp~ex_data$trt) +#' out$x[which.max(out$y)] # 0.3838 +#' +#' # Fit cube root to the data +#' model.asr <- asreml(resp^(1/3) ~ trt, +#' random = ~ block, +#' residual = ~ units, +#' data = ex_data) +#' resplot(model.asr) # residual plots look much better +#' +#' #Determine ranking and groups according to Tukey's Test +#' pred.out <- multiple_comparisons(model.obj = model.asr, +#' classify = "trt", +#' trans = "power", power = (1/3)) +#' +#' pred.out +#' autoplot(pred.out) +#' } +#' +#' @export +#' +multiple_comparisons <- function(model.obj, + classify, + sig = 0.05, + int.type = "ci", + trans = NA, + offset = NA, + power = NA, + decimals = 2, + descending = FALSE, + plot = FALSE, + label_height = 0.1, + rotation = 0, + save = FALSE, + savename = "predicted_values", + order, + pred.obj, + pred, + ...) { + + rlang::check_dots_used() + + if(!missing(pred)) { + warning("Argument `pred` has been deprecated and will be removed in a future version. Please use `classify` instead.") + classify <- pred + } + + if(!missing(order)) { + warning("Argument `order` has been deprecated and will be removed in a future version. Please use `descending` instead.") + } + + if(sig > 0.5) { + warning("Significance level given by `sig` is high. Perhaps you meant ", 1-sig, "?", call. = FALSE) + } + + # Get the individual names provided in classify + vars <- unlist(strsplit(classify, "\\:")) + reserved_col_names <- c("predicted.value", "std.error", "Df", + "groups", "PredictedValue", "ApproxSE", "ci", "low", "up") + if(any(vars %in% reserved_col_names)) { + stop("Invalid column name. Please change the name of column(s): ", vars[vars %in% reserved_col_names]) + } + + if(inherits(model.obj, "asreml")){ + + if(classify %!in% c(attr(stats::terms(model.obj$formulae$fixed), 'term.labels'), + attr(stats::terms(model.obj$formulae$random), 'term.labels'))) { + stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE) + } + + if(!missing(pred.obj)) { + warning("Argument `pred.obj` has been deprecated and will be removed in a future version. Predictions are now performed internally in the function.") + } + + pred.obj <- quiet(asreml::predict.asreml(model.obj, classify = classify, sed = TRUE, trace = FALSE, ...)) + # Check if all the predicted values are NA. If so, suggests the need of the `present` argument + if(all(is.na(pred.obj$pvals$predicted.value)) & all(is.na(pred.obj$pvals$std.error))) { + stop("All predicted values are aliased. Perhaps you need the `present` argument?") + } + + # Check if any treatments are aliased, and remove them and print a warning + if(anyNA(pred.obj$pvals$predicted.value)) { + aliased <- which(is.na(pred.obj$pvals$predicted.value)) + # Get the level values of the aliased treatments + # If only one treatment (classify does not contain :) all levels printed separated with , + # If multiple treatments, first need to concatenate columns, then collapse rows + aliased_names <- pred.obj$pvals[aliased, !names(pred.obj$pvals) %in% c("predicted.value", "std.error", "status")] + + # This pastes rows of the dataframe together across the columns, and turns into a vector + if(is.data.frame(aliased_names)) { + aliased_names <- apply(aliased_names, 1, paste, collapse = ":") + } + + if(length(aliased_names) > 1) { + warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.") + } + else { + warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.") + } + + pred.obj$pvals <- pred.obj$pvals[!is.na(pred.obj$pvals$predicted.value),] + pred.obj$pvals <- droplevels(pred.obj$pvals) + pred.obj$sed <- pred.obj$sed[-aliased, -aliased] + warning(warn_string, call. = FALSE) + } + + #For use with asreml 4+ + if(utils::packageVersion("asreml") > "4") { + pp <- pred.obj$pvals + sed <- pred.obj$sed + } + + pp <- pp[!is.na(pp$predicted.value),] + pp$status <- NULL + + dat.ww <- quiet(asreml::wald(model.obj, ssType = "conditional", denDF = "default", trace = FALSE)$Wald) + + dendf <- data.frame(Source = row.names(dat.ww), denDF = dat.ww$denDF) + + ifelse(grepl(":", classify), + pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"), + pp$Names <- pp[[classify]]) + + ndf <- dendf$denDF[grepl(classify, dendf$Source) & nchar(classify) == nchar(as.character(dendf$Source))] + if(rlang::is_empty(ndf)) { + ndf <- model.obj$nedf + rand_terms <- vars[vars %in% attr(stats::terms(model.obj$formulae$random), 'term.labels')] + warning(rand_terms, " is not a fixed term in the model. The denominator degrees of freedom are estimated using the residual degrees of freedom. This may be inaccurate.", call. = FALSE) + } + crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed + + # Grab the response from the formula to create plot Y label + ylab <- model.obj$formulae$fixed[[2]] + } + + else if(inherits(model.obj, c("aov", "lm", "lmerMod", "lmerModLmerTest"))) { + if(classify %!in% attr(stats::terms(model.obj), 'term.labels')) { + stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE) + } + + on.exit(options(emmeans = emmeans::emm_defaults)) + emmeans::emm_options("msg.interaction" = FALSE, "msg.nesting" = FALSE) + pred.out <- emmeans::emmeans(model.obj, as.formula(paste("~", classify))) + + sed <- pred.out@misc$sigma*sqrt(outer(1/pred.out@grid$.wgt., 1/pred.out@grid$.wgt., "+")) + pred.out <- as.data.frame(pred.out) + pred.out <- pred.out[,!grepl("CL", names(pred.out))] + pp <- pred.out + names(pp)[names(pp) == "emmean"] <- "predicted.value" + names(pp)[names(pp) == "SE"] <- "std.error" + + + # SED <- matrix(data = sed, nrow = nrow(pp), ncol = nrow(pp)) + diag(sed) <- NA + ifelse(grepl(":", classify), + pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"), + pp$Names <- pp[[classify]]) + + if(anyNA(pp$predicted.value)) { + aliased <- which(is.na(pp$predicted.value)) + # Get the level values of the aliased treatments + # If only one treatment (classify does not contain :) all levels printed separated with , + # If multiple treatments, first need to concatenate columns, then collapse rows + aliased_names <- pp[aliased, !names(pp) %in% c("predicted.value", "std.error", "df", "Names")] + + # This pastes rows of the dataframe together across the columns, and turns into a vector + if(is.data.frame(aliased_names)) { + aliased_names <- apply(aliased_names, 1, paste, collapse = ":") + } + + if(length(aliased_names) > 1) { + warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.") + } + else { + warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.") + } + + pp <- pp[!is.na(pp$predicted.value),] + pp <- droplevels(pp) + sed <- sed[-aliased, -aliased] + warning(warn_string, call. = FALSE) + } + + ndf <- pp$df[1] + crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed + + # Grab the response from the formula to create plot Y label + if(inherits(model.obj, c("lmerMod", "lmerModLmerTest"))) { + ylab <- model.obj@call[[2]][[2]] + } + else { + ylab <- model.obj$terms[[2]] + } + } + + else { + stop("Models of type ", class(model.obj), " are not supported.") + } + + # Check that the predicted levels don't contain a dash -, if they do replace and display warning + if(any(any(grepl("-", pp$Names) | grepl("-", pp[,1])))) { + levs <- unique(c(grep("-", pp[,1], value = TRUE), grep("-", pp$Names, value = TRUE))) + if(length(levs)>1) { + warning("The treatment levels ", paste(levs, collapse = ", "), " contained '-', which has been replaced in the final output with '_'") + } + else { + warning("The treatment level ", levs, " contained '-', which has been replaced in the final output with '_'") + } + pp[,1] <- gsub(pattern = "-", replacement = "_", pp[,1]) + pp$Names <- gsub(pattern = "-", replacement = "_", pp$Names) + } + + Names <- as.character(pp$Names) + + # Determine pairs that are significantly different + diffs <- abs(outer(pp$predicted.value, pp$predicted.value, "-")) > crit.val + # (diffs*sqrt(2))/sed # check against ptukey (use top/bottom triangle) + diffs <- diffs[lower.tri(diffs)] + + # Create a vector of treatment comparison names + m <- outer(pp$Names, pp$Names, paste, sep="-") + m <- m[lower.tri(m)] + + names(diffs) <- m + + ll <- multcompView::multcompLetters3("Names", "predicted.value", diffs, pp, reversed = !descending) + + rr <- data.frame(groups = ll$Letters) + rr$Names <- row.names(rr) + + pp.tab <- merge(pp,rr) + + if(!is.na(trans)){ + if(is.na(offset)) { + warning("Offset value assumed to be 0. Change with `offset` argument.") + offset <- 0 + } + + if(trans == "sqrt"){ + pp.tab$PredictedValue <- (pp.tab$predicted.value)^2 - ifelse(!is.na(offset), offset, 0) + pp.tab$ApproxSE <- 2*abs(pp.tab$std.error)*sqrt(pp.tab$PredictedValue) + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0) + pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0) + } + + if(trans == "log"){ + pp.tab$PredictedValue <- exp(pp.tab$predicted.value) - ifelse(!is.na(offset), offset, 0) + pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$low <- exp(pp.tab$predicted.value - pp.tab$ci) - ifelse(!is.na(offset), offset, 0) + pp.tab$up <- exp(pp.tab$predicted.value + pp.tab$ci) - ifelse(!is.na(offset), offset, 0) + } + + if(trans == "logit"){ + pp.tab$PredictedValue <- exp(pp.tab$predicted.value)/(1 + exp(pp.tab$predicted.value)) + pp.tab$ApproxSE <- pp.tab$PredictedValue * (1 - pp.tab$PredictedValue)* abs(pp.tab$std.error) + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$ll <- pp.tab$predicted.value - pp.tab$ci + pp.tab$low <- exp(pp.tab$ll)/(1 + exp(pp.tab$ll)) + pp.tab$uu <- pp.tab$predicted.value + pp.tab$ci + pp.tab$up <- exp(pp.tab$uu)/(1 + exp(pp.tab$uu)) + + pp.tab$ll <- NULL + pp.tab$uu <- NULL + } + + if(trans == "power"){ + pp.tab$PredictedValue <- (pp.tab$predicted.value)^(1/power) - ifelse(!is.na(offset), offset, 0) + pp.tab$ApproxSE <- pp.tab$std.error*(1/(power*pp.tab$PredictedValue^(power-1))) + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0) + pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0) + } + + if(trans == "inverse"){ + pp.tab$PredictedValue <- 1/pp.tab$predicted.value + pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue^2 + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$low <- 1/(pp.tab$predicted.value - pp.tab$ci) + pp.tab$up <- 1/(pp.tab$predicted.value + pp.tab$ci) + } + if (trans == "boxcox") { #include trans="boxcox" for transformations in the form dry_weight^lambda-1)/lambda, I am considering power=lambda for this transformation + pp.tab$PredictedValue <- (power*pp.tab$predicted.value+1)^(1/power) - + ifelse(!is.na(offset), offset, 0) + + pp.tab$ApproxSE <- (power*pp.tab$std.error+1) * (1/(power * + pp.tab$PredictedValue^(power - 1))) #not sure about se + if (int.type == "ci") { + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * + pp.tab$std.error + } + if (int.type == "1se") { + pp.tab$ci <- pp.tab$std.error + } + if (int.type == "2se") { + pp.tab$ci <- 2 * pp.tab$std.error + } + pp.tab$low <- (power*(pp.tab$predicted.value - pp.tab$ci)+1)^(1/power) - + ifelse(!is.na(offset), offset, 0) #not sure about low and up as is returning negative values + pp.tab$up <- (power*(pp.tab$predicted.value + pp.tab$ci)+1)^(1/power) - + ifelse(!is.na(offset), offset, 0) + } + + if (trans == "cloglog") { #include trans="cloglog" link + pp.tab$PredictedValue <- 1-exp(-exp(pp.tab$predicted.value)) + pp.tab$ApproxSE <- abs(pp.tab$std.error) * pp.tab$PredictedValue + if (int.type == "ci") { + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * + pp.tab$std.error + } + if (int.type == "1se") { + pp.tab$ci <- pp.tab$std.error + } + if (int.type == "2se") { + pp.tab$ci <- 2 * pp.tab$std.error + } + pp.tab$low <- 1-exp(-exp(pp.tab$predicted.value - pp.tab$ci)) + pp.tab$up <- 1-exp(-exp(pp.tab$predicted.value + pp.tab$ci)) + } + } + + else { + + if(int.type == "ci"){ + pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error + } + if(int.type == "1se"){ + pp.tab$ci <- pp.tab$std.error + } + if(int.type == "2se"){ + pp.tab$ci <- 2*pp.tab$std.error + } + pp.tab$low <- pp.tab$predicted.value - pp.tab$ci + pp.tab$up <- pp.tab$predicted.value + pp.tab$ci + + } + + pp.tab <- pp.tab[base::order(pp.tab$predicted.value, decreasing = descending),] + + pp.tab$Names <- NULL + trtindex <- max(unlist(lapply(paste0("^", vars, "$"), grep, x = names(pp.tab)))) + + trtnam <- names(pp.tab)[1:trtindex] + # Exclude reserved column names + trtnam <- trtnam[trtnam %!in% c("predicted.value", "std.error", "Df", + "groups", "PredictedValue", "ApproxSE", "ci", "low", "up")] + + for(i in seq_along(trtnam)){ + pp.tab[[trtnam[i]]] <- factor(pp.tab[[trtnam[i]]], levels = unique(pp.tab[[trtnam[i]]])) + } + + # rounding to the correct number of decimal places + pp.tab <- rapply(object = pp.tab, f = round, classes = "numeric", how = "replace", digits = decimals) + + if(save) { + write.csv(pp.tab, file = paste0(savename, ".csv"), row.names = FALSE) + } + + # If there are brackets in the label, grab the text from inside + if(is.call(ylab)) { + ylab <- as.character(ylab)[2] + } + attr(pp.tab, "ylab") <- ylab + + if(length(vars)>2) { + classify3 <- vars[3] + } + else if(length(vars) > 1) { + classify2 <- vars[2] + classify <- vars[1] + } + + class(pp.tab) <- c("mct", class(pp.tab)) + if(plot) { + print(autoplot(pp.tab)) + } + + if(exists("aliased_names")) { + attr(pp.tab, 'aliased') <- as.character(aliased_names) + } + + # Add the critical value as an attribute + if(stats::var(as.vector(crit.val), na.rm = TRUE) < 1e-10) { + attr(pp.tab, 'HSD') <- crit.val[1,2] + } + else { + attr(pp.tab, 'HSD') <- crit.val + } + + rownames(pp.tab) <- NULL + + return(pp.tab) +} + + +#' Print output of multiple_comparisons +#' +#' @param x An mct object to print to the console. +#' @param ... Other arguments +#' +#' @return The original object invisibly. +#' @seealso [multiple_comparisons()] +#' @method print mct +#' @export +#' @examples +#' dat.aov <- aov(Petal.Width ~ Species, data = iris) +#' output <- multiple_comparisons(dat.aov, classify = "Species") +#' print(output) +print.mct <- function(x, ...) { + stopifnot(inherits(x, "mct")) + print.data.frame(x, ...) + + if(!is.null(attr(x, "aliased"))) { + aliased <- attr(x, "aliased") + if(length(aliased) > 1) { + cat("\nAliased levels are:", paste(aliased[1:(length(aliased)-1)], collapse = ", "), "and", aliased[length(aliased)], "\n") + } + else { + cat("\nAliased level is:", aliased, "\n") + } + } + invisible(x) +} +