diff --git a/NEWS.md b/NEWS.md index 61b923a..b1f0529 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,9 @@ ## Bugfixes & changes * Fix `pca()` for `LogRatio` objects (arguments were not passed to the internal method). +## Internals +* Store groups as `factor` instead of `character`. + ## Breaking changes * `[` always returns a `CompositionMatrix` object by default, even if only one row/column is accessed. * Rename `get_totals()` and `set_totals()` to `totals()`. diff --git a/R/AllClasses.R b/R/AllClasses.R index 772833c..72c26c8 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -46,7 +46,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' An S4 class to represent compositional data. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot groups A [`character`] vector to store the group names. +#' @slot groups A [`factor`] vector to store the group names. #' @section Coerce: #' In the code snippets below, `x` is a `CompositionMatrix` object. #' \describe{ @@ -70,7 +70,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "CompositionMatrix", slots = c( totals = "numeric", - groups = "character" + groups = "factor" ), contains = c("NumericMatrix") ) @@ -81,7 +81,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' S4 classes to represent log-ratio data transformations. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot groups A [`character`] vector to store the group names. +#' @slot groups A [`factor`] vector to store the group names. #' @slot parts A [`character`] vector to store the original part names. #' @slot ratio A [`character`] vector to store the ratio names. #' @slot order An [`integer`] vector to store the original ordering of the @@ -107,7 +107,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "LogRatio", slots = c( totals = "numeric", - groups = "character", + groups = "factor", parts = "character", ratio = "character", @@ -158,7 +158,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' #' An S4 class to store the result of outlier detection. #' @slot samples A [`character`] vector to store the sample identifiers. -#' @slot groups A [`character`] vector to store the group names. +#' @slot groups A [`factor`] vector to store the group names. #' @slot standard A [`numeric`] matrix giving the standard squared Mahalanobis #' distances. #' @slot robust A [`numeric`] matrix giving the robust squared Mahalanobis @@ -180,7 +180,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "OutlierIndex", slots = c( samples = "character", - groups = "character", + groups = "factor", standard = "numeric", robust = "numeric", limit = "numeric", diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 56b20a7..82f8e03 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -753,8 +753,6 @@ NULL #' @param by A `vector` or a list of grouping elements, each as long as the #' variables in `x`. The elements are coerced to factors before use #' (in the sense that [`interaction(by)`][interaction()] defines the grouping). -#' @param drop A [`logical`] scalar indicating whether to drop unused -#' combinations of grouping values. #' @param ... Further arguments to be passed to [mean()]. #' @return A [`CompositionMatrix-class`] object. #' @seealso [mean()], [aggregate()] @@ -765,8 +763,8 @@ NULL #' @aliases condense-method setGeneric( name = "condense", - def = function(x, ...) standardGeneric("condense"), - valueClass = "CompositionMatrix" + def = function(x, ...) standardGeneric("condense") + # valueClass = "CompositionMatrix" ) #' Marginal Compositions diff --git a/R/aggregate.R b/R/aggregate.R index 9ba230c..b3a697e 100644 --- a/R/aggregate.R +++ b/R/aggregate.R @@ -9,18 +9,14 @@ aggregate.CompositionMatrix <- function(x, by, FUN, ..., simplify = TRUE, drop = TRUE) { m <- nrow(x) - ## Validation - if (!is.list(by)) by <- list(by) - arkhe::assert_lengths(by, m) - ## Grouping - index <- interaction(by, drop = drop, sep = "_") - if (length(unique(index)) == m) { + index <- as_groups(by) + if (nlevels(index) == 0 || nlevels(index) == m) { warning("Nothing to group by.", call. = FALSE) return(x) } - m <- tapply( + aggr <- tapply( X = seq_len(m), INDEX = index, FUN = function(i, data, fun, ...) fun(data[i, , drop = FALSE], ...), @@ -31,13 +27,13 @@ aggregate.CompositionMatrix <- function(x, by, FUN, ..., ) has_dim <- vapply( - X = m, + X = aggr, FUN = function(x) !is.null(nrow(x)) && nrow(x) > 1, FUN.VALUE = logical(1) ) - if (any(has_dim) || !simplify) return(m) - do.call(rbind, m) + if (any(has_dim) || !simplify) return(aggr) + do.call(rbind, aggr) } #' @export diff --git a/R/barplot.R b/R/barplot.R index 0a084ee..9f51465 100644 --- a/R/barplot.R +++ b/R/barplot.R @@ -11,7 +11,7 @@ barplot.CompositionMatrix <- function(height, ..., select = NULL, decreasing = TRUE, space = 0.2, offset = 0.025, color = palette_color_discrete(), - border = "black", axes = TRUE, legend = TRUE) { + border = NA, axes = TRUE, legend = TRUE) { ## Get data if (is.null(select)) select <- seq_len(ncol(height)) z <- height[, select, drop = FALSE] @@ -107,11 +107,11 @@ prepare_barplot <- function(x, groups = NULL, decreasing = TRUE, offset = 0.025) { ## Validation stopifnot(methods::is(x, "CompositionMatrix")) - if (!has_groups(groups)) groups <- rep("", nrow(x)) + if (!has_groups(groups)) groups <- rep(NA, nrow(x)) arkhe::assert_length(groups, nrow(x)) ## Row order - grp <- factor(groups, exclude = NULL) + grp <- as_groups(groups, drop_na = FALSE) spl <- lapply( X = split(x = x, f = grp), FUN = function(x, order, decrease) { diff --git a/R/coerce.R b/R/coerce.R index 0f6d033..6dd7121 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -31,7 +31,7 @@ setMethod( totals <- rowSums(from, na.rm = TRUE) from <- from / totals - grp <- rep(NA_character_, nrow(from)) + grp <- as_groups(rep(NA, nrow(from))) .CompositionMatrix(from, totals = unname(totals), groups = grp) } ) @@ -51,11 +51,8 @@ setMethod( ## Group names grp <- rep(NA_character_, nrow(from)) - if (!is.null(groups)) { - grp <- from[, groups, drop = FALSE] - grp <- as.character(interaction(grp, sep = "_")) - grp[grp == ""] <- NA_character_ - } + if (!is.null(groups)) grp <- from[, groups, drop = FALSE] + grp <- as_groups(grp) ## Remove non-numeric columns if (is.null(parts)) { diff --git a/R/condense.R b/R/condense.R index 94f385d..9e1e12d 100644 --- a/R/condense.R +++ b/R/condense.R @@ -8,16 +8,12 @@ NULL setMethod( f = "condense", signature = c("CompositionMatrix"), - definition = function(x, by = groups(x), drop = TRUE, ...) { + definition = function(x, by = groups(x), ...) { m <- nrow(x) - ## Validation - if (!is.list(by)) by <- list(by) - arkhe::assert_lengths(by, m) - ## Grouping - index <- interaction(by, drop = drop, sep = "_") - if (length(unique(index)) == m) { + index <- as_groups(by) + if (nlevels(index) == 0 || nlevels(index) == m) { warning("Nothing to group by.", call. = FALSE) return(x) } @@ -36,10 +32,11 @@ setMethod( tot <- tapply(X = totals(x), INDEX = index, FUN = mean, simplify = TRUE) grp <- groups(x) + if (has_groups(grp)) grp <- flatten_chr(x = grp, by = index) - else grp <- rep(NA_character_, length(tot)) + else grp <- rep(NA, length(tot)) rownames(z) <- levels(index) - .CompositionMatrix(z, totals = as.numeric(tot), groups = grp) + .CompositionMatrix(z, totals = as.numeric(tot), groups = as_groups(grp)) } ) diff --git a/R/group.R b/R/group.R index ed0b1cc..0322060 100644 --- a/R/group.R +++ b/R/group.R @@ -26,6 +26,24 @@ setMethod( ) # Groups ======================================================================= +as_groups <- function(x, drop_levels = TRUE, drop_na = TRUE) { + if (!is.factor(x)) { + if (!is.list(x)) x <- list(x) + x <- rapply( + object = x, + f = function(x) { + x[x == ""] <- NA + x + }, + classes = "character", + how = "replace" + ) + x <- interaction(x, sep = "_") + } + if (drop_levels) x <- droplevels(x) + if (!drop_na) x <- addNA(x, ifany = TRUE) + x +} has_groups <- function(x) { length(x) > 0 && any(in_groups(x)) } @@ -78,13 +96,9 @@ setMethod( f = "groups<-", signature = c(object = "CompositionMatrix", value = "ANY"), definition = function(object, value) { - if (is.null(value)) { - object@groups <- rep(NA_character_, nrow(object)) - } else { - value <- as.character(value) - value[value == ""] <- NA_character_ - object@groups <- value - } + if (is.null(value)) value <- rep(NA_character_, nrow(object)) + value <- as_groups(value) + object@groups <- value methods::validObject(object) object } @@ -97,9 +111,7 @@ setMethod( f = "groups<-", signature = c(object = "CompositionMatrix", value = "list"), definition = function(object, value) { - value <- interaction(value, sep = "_") - value <- as.character(value) - value[value == ""] <- NA_character_ + value <- as_groups(value) object@groups <- value methods::validObject(object) object diff --git a/R/pca.R b/R/pca.R index 20ae0fa..c350807 100644 --- a/R/pca.R +++ b/R/pca.R @@ -31,7 +31,7 @@ setMethod( x <- methods::callNextMethod(object = object, center = center, scale = scale, rank = rank, sup_row = sup_row, sup_col = sup_col, weight_row = weight_row, weight_col = weight_col) - if (any_assigned(object)) x@rows@groups <- groups(object) + if (any_assigned(object)) x@rows@groups <- as.character(groups(object)) x } ) diff --git a/inst/tinytest/_snaps/coerce.rds b/inst/tinytest/_snaps/coerce.rds index a8a188e..b3de970 100644 Binary files a/inst/tinytest/_snaps/coerce.rds and b/inst/tinytest/_snaps/coerce.rds differ diff --git a/inst/tinytest/_snaps/margin.rds b/inst/tinytest/_snaps/margin.rds index 8d4b9b6..1ffb382 100644 Binary files a/inst/tinytest/_snaps/margin.rds and b/inst/tinytest/_snaps/margin.rds differ diff --git a/inst/tinytest/_snaps/missing_multiplicative.rds b/inst/tinytest/_snaps/missing_multiplicative.rds index 450b8ef..5de38ed 100644 Binary files a/inst/tinytest/_snaps/missing_multiplicative.rds and b/inst/tinytest/_snaps/missing_multiplicative.rds differ diff --git a/inst/tinytest/_snaps/scale.rds b/inst/tinytest/_snaps/scale.rds index 2e415a9..b8df618 100644 Binary files a/inst/tinytest/_snaps/scale.rds and b/inst/tinytest/_snaps/scale.rds differ diff --git a/inst/tinytest/_snaps/transform_alr.rds b/inst/tinytest/_snaps/transform_alr.rds index 9902477..ad099bc 100644 Binary files a/inst/tinytest/_snaps/transform_alr.rds and b/inst/tinytest/_snaps/transform_alr.rds differ diff --git a/inst/tinytest/_snaps/transform_clr.rds b/inst/tinytest/_snaps/transform_clr.rds index 386d0af..d0331e6 100644 Binary files a/inst/tinytest/_snaps/transform_clr.rds and b/inst/tinytest/_snaps/transform_clr.rds differ diff --git a/inst/tinytest/_snaps/transform_ilr.rds b/inst/tinytest/_snaps/transform_ilr.rds index 034e534..0351138 100644 Binary files a/inst/tinytest/_snaps/transform_ilr.rds and b/inst/tinytest/_snaps/transform_ilr.rds differ diff --git a/inst/tinytest/_snaps/transform_lr.rds b/inst/tinytest/_snaps/transform_lr.rds index 3825899..f6d9b5d 100644 Binary files a/inst/tinytest/_snaps/transform_lr.rds and b/inst/tinytest/_snaps/transform_lr.rds differ diff --git a/inst/tinytest/_snaps/transform_plr.rds b/inst/tinytest/_snaps/transform_plr.rds index eb42f65..72e9a52 100644 Binary files a/inst/tinytest/_snaps/transform_plr.rds and b/inst/tinytest/_snaps/transform_plr.rds differ diff --git a/inst/tinytest/_snaps/zero_multiplicative.rds b/inst/tinytest/_snaps/zero_multiplicative.rds index f8c189e..b9e58bb 100644 Binary files a/inst/tinytest/_snaps/zero_multiplicative.rds and b/inst/tinytest/_snaps/zero_multiplicative.rds differ diff --git a/inst/tinytest/test_mutators.R b/inst/tinytest/test_mutators.R index 8adadf8..a2967fd 100644 --- a/inst/tinytest/test_mutators.R +++ b/inst/tinytest/test_mutators.R @@ -2,11 +2,11 @@ data("hongite") coda <- as_composition(hongite) -expect_equal(groups(coda), rep(NA_character_, nrow(coda))) +expect_equal(groups(coda), factor(rep(NA_character_, nrow(coda)))) expect_false(any_assigned(coda)) groups(coda) <- rep(c("A", "B", "C", "D", NA), each = 5) -expect_equal(groups(coda), rep(c("A", "B", "C", "D", NA), each = 5)) +expect_equal(groups(coda), factor(rep(c("A", "B", "C", "D", NA), each = 5))) expect_true(any_assigned(coda)) expect_equal(is_assigned(coda), rep(c(TRUE, FALSE), c(20, 5))) diff --git a/inst/tinytest/test_plot.R b/inst/tinytest/test_plot.R index 89424aa..255b23c 100644 --- a/inst/tinytest/test_plot.R +++ b/inst/tinytest/test_plot.R @@ -18,16 +18,16 @@ if (at_home()) { expect_snapshot_plot(plot_hist, "plot_hist") # Barplot ==================================================================== - plot_barplot <- function() barplot(coda, order_columns = FALSE) + plot_barplot <- function() barplot(coda, order_columns = FALSE, border = "black") expect_snapshot_plot(plot_barplot, "plot_barplot") - plot_barplot_order <- function() barplot(coda, order_columns = TRUE) + plot_barplot_order <- function() barplot(coda, order_columns = TRUE, border = "black") expect_snapshot_plot(plot_barplot_order, "plot_barplot_order_columns") - plot_barplot_order <- function() barplot(coda, order_rows = 2) + plot_barplot_order <- function() barplot(coda, order_rows = 2, border = "black") expect_snapshot_plot(plot_barplot_order, "plot_barplot_order_rows") - plot_barplot_group <- function() barplot(coda, by = rep(1:5, 5), order_columns = TRUE) + plot_barplot_group <- function() barplot(coda, by = rep(1:5, 5), order_columns = TRUE, border = "black") expect_snapshot_plot(plot_barplot_group, "plot_barplot_group") # Density ==================================================================== diff --git a/man/CompositionMatrix-class.Rd b/man/CompositionMatrix-class.Rd index 7764403..472d157 100644 --- a/man/CompositionMatrix-class.Rd +++ b/man/CompositionMatrix-class.Rd @@ -14,7 +14,7 @@ An S4 class to represent compositional data. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} +\item{\code{groups}}{A \code{\link{factor}} vector to store the group names.} }} \note{ diff --git a/man/LogRatio-class.Rd b/man/LogRatio-class.Rd index 65835c3..f1b5588 100644 --- a/man/LogRatio-class.Rd +++ b/man/LogRatio-class.Rd @@ -24,7 +24,7 @@ S4 classes to represent log-ratio data transformations. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} +\item{\code{groups}}{A \code{\link{factor}} vector to store the group names.} \item{\code{parts}}{A \code{\link{character}} vector to store the original part names.} diff --git a/man/OutlierIndex-class.Rd b/man/OutlierIndex-class.Rd index c080205..1a371ac 100644 --- a/man/OutlierIndex-class.Rd +++ b/man/OutlierIndex-class.Rd @@ -13,7 +13,7 @@ An S4 class to store the result of outlier detection. \describe{ \item{\code{samples}}{A \code{\link{character}} vector to store the sample identifiers.} -\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} +\item{\code{groups}}{A \code{\link{factor}} vector to store the group names.} \item{\code{standard}}{A \code{\link{numeric}} matrix giving the standard squared Mahalanobis distances.} diff --git a/man/barplot.Rd b/man/barplot.Rd index 0b6f56c..8af1a72 100644 --- a/man/barplot.Rd +++ b/man/barplot.Rd @@ -17,7 +17,7 @@ space = 0.2, offset = 0.025, color = palette_color_discrete(), - border = "black", + border = NA, axes = TRUE, legend = TRUE ) diff --git a/man/condense.Rd b/man/condense.Rd index 1b1b927..d42399c 100644 --- a/man/condense.Rd +++ b/man/condense.Rd @@ -9,7 +9,7 @@ \usage{ condense(x, ...) -\S4method{condense}{CompositionMatrix}(x, by = groups(x), drop = TRUE, ...) +\S4method{condense}{CompositionMatrix}(x, by = groups(x), ...) } \arguments{ \item{x}{A \code{\linkS4class{CompositionMatrix}} object.} @@ -19,9 +19,6 @@ condense(x, ...) \item{by}{A \code{vector} or a list of grouping elements, each as long as the variables in \code{x}. The elements are coerced to factors before use (in the sense that \code{\link[=interaction]{interaction(by)}} defines the grouping).} - -\item{drop}{A \code{\link{logical}} scalar indicating whether to drop unused -combinations of grouping values.} } \value{ A \code{\linkS4class{CompositionMatrix}} object.