Skip to content

Commit

Permalink
Rewrite plot() method for OutlierIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
nfrerebeau committed Jun 14, 2024
1 parent a60d930 commit 7e6b069
Show file tree
Hide file tree
Showing 18 changed files with 271 additions and 415 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

## Breaking changes
* `[` always returns a `CompositionMatrix` object by default, even if only one row/column is accessed.
* Rewrite `plot()` method for `OutlierIndex` object.

# nexus 0.2.0
## New classes and methods
Expand Down
18 changes: 9 additions & 9 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,21 +163,21 @@ setClassUnion("index", members = c("logical", "numeric", "character"))
#' Outliers
#'
#' An S4 class to store the result of outlier detection.
#' @slot .Data A [`logical`] matrix giving the squared Mahalanobis distance.
#' @slot samples A [`character`] vector to store the sample identifiers
#' (allows duplicates in case of repeated measurements).
#' @slot groups A [`character`] vector to store the group names.
#' @slot groups A [`list`] giving the samples index of each group.
#' @slot standard A [`numeric`] matrix giving the standard squared Mahalanobis
#' distances.
#' @slot robust A [`numeric`] matrix giving the robust squared Mahalanobis
#' distances.
#' @slot limit A [`numeric`] value giving the cut-off value used for outliers
#' detection (quantile of the Chi-squared distribution).
#' @slot robust An [`logical`] scalar: were robust estimators used?
#' @slot dof A (non-negative) [`numeric`] value giving the degrees of freedom.
#' @section Coerce:
#' In the code snippets below, `x` is an `OutlierIndex` object.
#' \describe{
#' \item{`as.data.frame(x)`}{Coerces to a [`data.frame`].}
#' }
#' @note
#' This class inherits from [`logical`].
#' @author N. Frerebeau
#' @family classes
#' @docType class
Expand All @@ -187,11 +187,11 @@ setClassUnion("index", members = c("logical", "numeric", "character"))
Class = "OutlierIndex",
slots = c(
samples = "character",
groups = "character",
groups = "list",

standard = "matrix",
robust = "matrix",
limit = "numeric",
robust = "logical",
dof = "integer"
),
contains = "matrix"
)
)
21 changes: 13 additions & 8 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1297,15 +1297,20 @@ setGeneric(
#' Plot Outliers
#'
#' @param x An [`OutlierIndex-class`] object.
#' @param select A length-one vector giving the group to be plotted.
#' @param type A [`character`] string specifying the type of plot that should be
#' made. It must be one of "`dotchart`" or "`qqplot`". Any unambiguous
#' substring can be given.
#' @param pch.in,pch.out A symbol specification for non-outliers and outliers
#' (resp.).
#' @param flip A [`logical`] scalar: should the y-axis (ticks and numbering) be
#' flipped from side 2 (left) to 4 (right) from group to group?
#' @param ncol An [`integer`] specifying the number of columns to use.
#' Defaults to 1 for up to 4 groups, otherwise to 2.
#' made. It must be one of "`dotchart`", "`distance`" or "`qqplot`".
#' Any unambiguous substring can be given.
#' @param robust A [`logical`] scalar: should robust Mahalanobis distances be
#' displayed? Only used if `type` is "`dotchart`" or "`qqplot`".
#' @param pch A lenth-three vector of symbol specification for non-outliers and
#' outliers (resp.).
#' @param xlim A length-two [`numeric`] vector giving the x limits of the plot.
#' The default value, `NULL`, indicates that the range of the
#' [finite][is.finite()] values to be plotted should be used.
#' @param ylim A length-two [`numeric`] vector giving the y limits of the plot.
#' The default value, `NULL`, indicates that the range of the
#' [finite][is.finite()] values to be plotted should be used.
#' @param xlab,ylab A [`character`] vector giving the x and y axis labels.
#' @param main A [`character`] string giving a main title for the plot.
#' @param sub A [`character`] string giving a subtitle for the plot.
Expand Down
17 changes: 1 addition & 16 deletions R/coerce.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,21 +157,6 @@ setMethod(
}
)

#' @export
#' @rdname as_features
#' @aliases as_features,OutlierIndex-method
setMethod(
f = "as_features",
signature = c(from = "OutlierIndex"),
definition = function(from) {
data.frame(
sample = get_samples(from),
group = get_groups(from),
from
)
}
)

# To data.frame ================================================================
#' @method as.data.frame CompositionMatrix
#' @export
Expand All @@ -188,5 +173,5 @@ as.data.frame.LogRatio <- function(x, ...) {
#' @method as.data.frame OutlierIndex
#' @export
as.data.frame.OutlierIndex <- function(x, ...) {
as.data.frame(methods::as(x, "matrix"), row.names = rownames(x))
as.data.frame(x@standard, row.names = rownames(x))
}
10 changes: 0 additions & 10 deletions R/mutators.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ setMethod("is_assigned", "CompositionMatrix", function(x) !is.na(get_groups(x)))
#' @aliases is_assigned,LogRatio-method
setMethod("is_assigned", "LogRatio", function(x) !is.na(get_groups(x)))

#' @export
#' @rdname groups
#' @aliases is_assigned,OutlierIndex-method
setMethod("is_assigned", "OutlierIndex", function(x) !is.na(get_groups(x)))

#' @export
#' @rdname groups
#' @aliases any_assigned,CompositionMatrix-method
Expand All @@ -52,11 +47,6 @@ setMethod("any_assigned", "CompositionMatrix", function(x) any(is_assigned(x)))
#' @aliases any_assigned,LogRatio-method
setMethod("any_assigned", "LogRatio", function(x) any(is_assigned(x)))

#' @export
#' @rdname groups
#' @aliases any_assigned,OutlierIndex-method
setMethod("any_assigned", "OutlierIndex", function(x) any(is_assigned(x)))

#' @export
#' @rdname groups
#' @aliases get_groups,CompositionMatrix-method
Expand Down
161 changes: 0 additions & 161 deletions R/nexus-internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,164 +24,3 @@ label_percent <- function(x, digits = NULL, trim = FALSE) {
x[i] <- y
x
}

#' Single Panel Plot
#'
#' @param x,y A [`numeric`] vector.
#' @param name A [`character`] string.
#' @param panel A [`function`] in the form `function(x, y, name, ...)`
#' which gives the action to be carried out in each panel of the display.
#' @param asp A length-one [`numeric`] vector giving the aspect ratio y/x
#' (see [graphics::plot.window()]).
#' @param xlim,ylim A length-two [`numeric`] vector specifying the the x and y
#' limits.
#' @param main A [`character`] string giving a main title for the plot.
#' @param sub A [`character`] string giving a subtitle for the plot.
#' @param ann A [`logical`] scalar: should the default annotation (title and x
#' and y axis labels) appear on the plot?
#' @param axes A [`logical`] scalar: should axes be drawn on the plot?
#' @param frame.plot A [`logical`] scalar: should a box be drawn around the
#' plot?
#' @param panel.first An an `expression` to be evaluated after the plot axes are
#' set up but before any plotting takes place. This can be useful for drawing
#' background grids.
#' @param panel.last An `expression` to be evaluated after plotting has taken
#' place but before the axes, title and box are added.
#' @param ... Further parameters to be passed to `panel`
#' (e.g. [graphical parameters][graphics::par]).
#' @return
#' Called for its side-effects: it results in a graphic being displayed.
#' Invisibly returns a list with elements `x` and `y`.
#' @keywords internal
#' @noRd
.plot_single <- function(x, y, name, panel, ..., asp = NA,
xlim = NULL, ylim = NULL,
xlab = NULL, ylab = NULL,
main = NULL, sub = NULL,
ann = graphics::par("ann"),
axes = TRUE, frame.plot = axes,
panel.first = NULL, panel.last = NULL) {
## Open new window
grDevices::dev.hold()
on.exit(grDevices::dev.flush(), add = TRUE)
graphics::plot.new()

## Set plotting coordinates
xlim <- xlim %||% range(x, finite = TRUE)
ylim <- ylim %||% range(y, finite = TRUE)
graphics::plot.window(xlim = xlim, ylim = ylim, asp = asp)

## Evaluate pre-plot expressions
panel.first

## Plot
panel(x, y, name, ...)

## Evaluate post-plot and pre-axis expressions
panel.last

## Construct Axis
if (axes) {
graphics::axis(side = 1, las = 1)
graphics::axis(side = 2, las = 1)
}

## Plot frame
if (frame.plot) {
graphics::box()
}

## Add annotation
if (ann) {
graphics::title(main = main, sub = sub, xlab = xlab, ylab = ylab)
}

invisible(list(x, y))
}

#' Multiple Panels Plot
#'
#' @param x A [`numeric`] vector.
#' @param y A [`numeric`] matrix.
#' @param panel A [`function`] in the form `function(x, y, name, ...)`
#' which gives the action to be carried out in each panel of the display
#' (see [.plot_single()]).
#' @param y_flip A [`logical`] scalar: should the y-axis (ticks and numbering)
#' be flipped from side 2 (left) to 4 (right) from series to series?
#' @param ncol An [`integer`] specifying the number of columns to use.
#' Defaults to 1 for up to 4 series, otherwise to 2.
#' @inheritParams .plot_single
#' @return
#' Called for its side-effects: it results in a graphic being displayed.
#' Invisibly returns `x`.
#' @keywords internal
#' @noRd
.plot_multiple <- function(x, y, panel, ..., asp = NA,
y_flip = TRUE, n_col = NULL,
xlim = NULL, ylim = NULL,
xlab = NULL, ylab = NULL,
main = NULL, sub = NULL,
ann = graphics::par("ann"),
axes = TRUE, frame.plot = axes,
panel.first = NULL, panel.last = NULL) {
m <- ncol(y)
m_seq <- seq_len(m)
if (is.null(n_col)) n_col <- if (m > 4) 2 else 1
n_row <- ceiling(m / n_col)

## Graphical parameters
## Save and restore
old_par <- graphics::par(
mar = c(0, 5.1, 0, if (y_flip) 5.1 else 2.1),
oma = c(6, 0, 5, 0),
mfcol = c(n_row, n_col)
)
on.exit(graphics::par(old_par))

dots <- list(...)
cex.lab <- dots$cex.lab %||% graphics::par("cex.lab")
col.lab <- dots$col.lab %||% graphics::par("col.lab")
font.lab <- dots$font.lab %||% graphics::par("font.lab")
cex.main <- dots$cex.main %||% graphics::par("cex.main")
font.main <- dots$font.main %||% graphics::par("font.main")
col.main <- dots$col.main %||% graphics::par("col.main")

for (j in m_seq) {
## Plot
yj <- y[, j, drop = TRUE]
.plot_single(x = x, y = yj, name = colnames(y)[[j]], panel = panel,
asp = asp, xlim = xlim, ylim = ylim,
main = NULL, sub = NULL, ann = FALSE, axes = FALSE,
frame.plot = frame.plot,
panel.first = panel.first, panel.last = panel.last, ...)

## Construct Axis
do_x <- (j %% n_row == 0 || j == m)
y_side <- if (j %% 2 || !y_flip) 2 else 4
if (axes) {
if (do_x) {
graphics::axis(side = 1, xpd = NA, las = 1)
}
graphics::axis(side = y_side, xpd = NA, las = 1)
}

## Add annotation
if (ann) {
if (do_x) {
graphics::mtext(xlab, side = 1, line = 3,
cex = cex.lab, col = col.lab, font = font.lab)
}
graphics::mtext(ylab, side = y_side, line = 3,
cex = cex.lab, col = col.lab, font = font.lab)
}
}

## Add annotation
if (ann) {
graphics::par(mfcol = c(1, 1))
graphics::mtext(main, side = 3, line = 3,
cex = cex.main, font = font.main, col = col.main)
}

invisible(x)
}
51 changes: 29 additions & 22 deletions R/outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ NULL
setMethod(
f = "outliers",
signature = c(object = "CompositionMatrix"),
definition = function(object, ..., groups = get_groups(object), robust = TRUE,
definition = function(object, ..., groups = get_groups(object),
method = c("mve", "mcd"), quantile = 0.975) {
## Transformation
z <- transform_ilr(object)
Expand All @@ -19,53 +19,60 @@ setMethod(
p <- ncol(z)
if (is.null(groups) || all(is.na(groups))) {
grp <- list(z)
groups <- rep(NA_character_, m)
groups <- list(seq_len(m))
} else {
grp <- split(z, f = groups)
groups <- split(seq_len(m), f = groups)
}

## Clean
size <- vapply(X = grp, FUN = nrow, FUN.VALUE = integer(1), USE.NAMES = TRUE)
too_small <- size < p
too_small <- size < (p + 1)
very_small <- size < (2 * p)
if (all(too_small)) {
stop("Too few samples.", call. = FALSE)
}
if (any(too_small)) {
msg <- "%s is ignored: sample size is too small (%d).\n"
warning(sprintf(msg, names(grp)[too_small], size[too_small]), call. = FALSE)
}
if (all(too_small)) {
stop("Too few samples.", call. = FALSE)
if (any(very_small)) {
msg <- "%s: possibly too small sample size (%d).\n"
warning(sprintf(msg, names(grp)[very_small], size[very_small]), call. = FALSE)
}
grp <- grp[!too_small]

d2 <- matrix(data = NA_real_, nrow = m, ncol = length(grp))
dimnames(d2) <- list(rownames(object), names(grp))
dc <- dr <- matrix(data = NA_real_, nrow = m, ncol = length(grp),
dimnames = list(rownames(object), names(grp)))

## For each group...
for (i in seq_along(grp)) {
for (i in seq_along(grp)[!too_small]) {
## ...subset
tmp <- grp[[i]]

## ...compute center and spread
if (!robust) method <- "classical" # Standard estimators
else method <- match.arg(method, several.ok = FALSE) # Robust estimators
est <- MASS::cov.rob(tmp, method = method, ...)
## Standard estimators
estc <- list(center = colMeans(tmp, na.rm = TRUE), cov = cov(tmp))

## Robust estimators
method <- match.arg(method, several.ok = FALSE)
estr <- MASS::cov.rob(tmp, method = method, ...)

## Mahalanobis distance
d2[, i] <- stats::mahalanobis(z, center = est$center, cov = est$cov)
## ...Mahalanobis distance
dc[, i] <- stats::mahalanobis(z, center = estc$center, cov = estc$cov)
dr[, i] <- stats::mahalanobis(z, center = estr$center, cov = estr$cov)
}

## Threshold
dof <- p - 1L
limit <- sqrt(stats::qchisq(p = quantile, df = dof))

d <- sqrt(d2)
limit <- sqrt(stats::qchisq(p = quantile, df = p))

.OutlierIndex(
d,
samples = get_samples(object),
groups = as.character(groups),
groups = groups,

standard = sqrt(dc),
robust = sqrt(dr),
limit = limit,
robust = robust,
dof = dof
dof = p
)
}
)
Loading

0 comments on commit 7e6b069

Please sign in to comment.