diff --git a/DESCRIPTION b/DESCRIPTION index 978cbf725c..e58b6462d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -242,6 +242,7 @@ Collate: 'scales-.R' 'stat-align.R' 'stat-bin.R' + 'stat-summary-2d.R' 'stat-bin2d.R' 'stat-bindot.R' 'stat-binhex.R' @@ -263,7 +264,6 @@ Collate: 'stat-smooth-methods.R' 'stat-smooth.R' 'stat-sum.R' - 'stat-summary-2d.R' 'stat-summary-bin.R' 'stat-summary-hex.R' 'stat-summary.R' diff --git a/NEWS.md b/NEWS.md index e19471d2e2..4f766433ed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # ggplot2 (development version) +* All binning stats now use the `boundary`/`center` parametrisation rather + than `origin`, following in `stat_bin()`'s footsteps (@teunbrand). +* `stat_summary_2d()` and `stat_bin_2d()` now deal with zero-range data + more elegantly (@teunbrand, #6207). * `geom_ribbon()` now appropriately warns about, and removes, missing values (@teunbrand, #6243). * `guide_*()` can now accept two inside legend theme elements: @@ -232,7 +236,7 @@ * The ellipsis argument is now checked in `fortify()`, `get_alt_text()`, `labs()` and several guides (@teunbrand, #3196). * `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647). -* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449) +* Reintroduced `drop` argument to `stat_bin()` (@teunbrand, #3449) * (internal) removed barriers for using 2D structures as aesthetics (@teunbrand, #4189). * `coord_sf()` no longer errors when dealing with empty graticules (@teunbrand, #6052) diff --git a/R/bin.R b/R/bin.R index 055721f0e4..f45fe90090 100644 --- a/R/bin.R +++ b/R/bin.R @@ -54,19 +54,12 @@ bin_breaks <- function(breaks, closed = c("right", "left")) { bin_breaks_width <- function(x_range, width = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { - check_length(x_range, 2L) - # binwidth seems to be the argument name supplied to width. (stat-bin and stat-bindot) - check_number_decimal(width, min = 0, allow_infinite = FALSE, arg = "binwidth") - - if (!is.null(boundary) && !is.null(center)) { - cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.") - } else if (is.null(boundary)) { + if (is.null(boundary)) { if (is.null(center)) { # If neither edge nor center given, compute both using tile layer's # algorithm. This puts min and max of data in outer half of their bins. boundary <- width / 2 - } else { # If center given but not boundary, compute boundary. boundary <- center - width / 2 @@ -75,9 +68,6 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL, # Find the left side of left-most bin: inputs could be Dates or POSIXct, so # coerce to numeric first. - x_range <- as.numeric(x_range) - width <- as.numeric(width) - boundary <- as.numeric(boundary) shift <- floor((x_range[1] - boundary) / width) origin <- boundary + shift * width @@ -104,9 +94,7 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL, bin_breaks_bins <- function(x_range, bins = 30, center = NULL, boundary = NULL, closed = c("right", "left")) { - check_length(x_range, 2L) - check_number_whole(bins, min = 1) if (zero_range(x_range)) { # 0.1 is the same width as the expansion `default_expansion()` gives for 0-width data width <- 0.1 @@ -128,6 +116,56 @@ bin_breaks_bins <- function(x_range, bins = 30, center = NULL, # Compute bins ------------------------------------------------------------ +compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = NULL, + center = NULL, boundary = NULL, + closed = c("right", "left")) { + + range <- if (is.scale(scale)) scale$dimension() else range(x) + check_length(range, 2L) + + if (!is.null(breaks)) { + breaks <- allow_lambda(breaks) + if (is.function(breaks)) { + breaks <- breaks(x) + } + if (is.scale(scale) && !scale$is_discrete()) { + breaks <- scale$transform(breaks) + } + check_numeric(breaks) + bins <- bin_breaks(breaks, closed) + return(bins) + } + + check_number_decimal(boundary, allow_infinite = FALSE, allow_null = TRUE) + check_number_decimal(center, allow_infinite = FALSE, allow_null = TRUE) + if (!is.null(boundary) && !is.null(center)) { + cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.") + } + + if (!is.null(binwidth)) { + binwidth <- allow_lambda(binwidth) + if (is.function(binwidth)) { + binwidth <- binwidth(x) + } + check_number_decimal(binwidth, min = 0, allow_infinite = FALSE) + bins <- bin_breaks_width( + range, binwidth, + center = center, boundary = boundary, closed = closed + ) + return(bins) + } + + bins <- allow_lambda(bins) + if (is.function(bins)) { + bins <- bins(x) + } + check_number_whole(bins, min = 1, allow_infinite = FALSE) + bin_breaks_bins( + range, bins, + center = center, boundary = boundary, closed = closed + ) +} + bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { check_object(bins, is_bins, "a {.cls ggplot2_bins} object") @@ -141,8 +179,7 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { weight[is.na(weight)] <- 0 } - bin_idx <- cut(x, bins$fuzzy, right = bins$right_closed, - include.lowest = TRUE) + bin_idx <- bin_cut(x, bins) bin_count <- as.numeric(tapply(weight, bin_idx, sum, na.rm = TRUE)) bin_count[is.na(bin_count)] <- 0 @@ -170,6 +207,10 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { bin_out(bin_count, bin_x, bin_widths) } +bin_cut <- function(x, bins) { + cut(x, bins$fuzzy, right = bins$right_closed, include.lowest = TRUE) +} + bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), xmin = x - width / 2, xmax = x + width / 2) { density <- count / width / sum(abs(count)) @@ -186,3 +227,41 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), .size = length(count) ) } + +bin_loc <- function(x, id) { + left <- x[-length(x)] + right <- x[-1] + + list( + left = left[id], + right = right[id], + mid = ((left + right) / 2)[id], + length = diff(x)[id] + ) +} + +fix_bin_params = function(params, fun, version) { + + if (!is.null(params$origin)) { + args <- paste0(fun, c("(origin)", "(boundary)")) + deprecate_warn0(version, args[1], args[2]) + params$boudnary <- params$origin + params$origin <- NULL + } + + if (!is.null(params$right)) { + args <- paste0(fun, c("(right)", "(closed)")) + deprecate_warn0(version, args[1], args[2]) + params$closed <- if (isTRUE(params$right)) "right" else "left" + params$right <- NULL + } + + if (is.null(params$breaks %||% params$binwidth %||% params$bins)) { + cli::cli_inform( + "{.fn {fun}} using {.code bins = 30}. Pick better value {.arg binwidth}." + ) + params$bins <- 30 + } + + params +} diff --git a/R/geom-bin2d.R b/R/geom-bin2d.R index 5a143df51d..e0c78ab5e4 100644 --- a/R/geom-bin2d.R +++ b/R/geom-bin2d.R @@ -25,7 +25,7 @@ NULL #' # You can control the size of the bins by specifying the number of #' # bins in each direction: #' d + geom_bin_2d(bins = 10) -#' d + geom_bin_2d(bins = 30) +#' d + geom_bin_2d(bins = list(x = 30, y = 10)) #' #' # Or by specifying the width of the bins #' d + geom_bin_2d(binwidth = c(0.1, 0.1)) diff --git a/R/stat-bin.R b/R/stat-bin.R index 9c571ae519..711b0c5ad7 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -26,10 +26,11 @@ #' or left edges of bins are included in the bin. #' @param pad If `TRUE`, adds empty bins at either end of x. This ensures #' frequency polygons touch 0. Defaults to `FALSE`. -#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such +#' @param drop Treatment of zero count bins. If `"all"` (default), such #' bins are kept as-is. If `"none"`, all zero count bins are filtered out. #' If `"inner"` only zero count bins at the flanks are filtered out, but not -#' in the middle. +#' in the middle. `TRUE` is shorthand for `"all"` and `FALSE` is shorthand +#' for `"none"`. #' @eval rd_computed_vars( #' count = "number of points in bin.", #' density = "density of points in bin, scaled to integrate to 1.", @@ -59,7 +60,7 @@ stat_bin <- function(mapping = NULL, data = NULL, closed = c("right", "left"), pad = FALSE, na.rm = FALSE, - keep.zeroes = "all", + drop = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE) { @@ -82,7 +83,7 @@ stat_bin <- function(mapping = NULL, data = NULL, pad = pad, na.rm = na.rm, orientation = orientation, - keep.zeroes = keep.zeroes, + drop = drop, ... ) ) @@ -95,9 +96,13 @@ stat_bin <- function(mapping = NULL, data = NULL, StatBin <- ggproto("StatBin", Stat, setup_params = function(self, data, params) { params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE) - params$keep.zeroes <- arg_match0( - params$keep.zeroes %||% "all", - c("all", "none", "inner"), arg_nm = "keep.zeroes" + + if (is.logical(params$drop)) { + params$drop <- if (isTRUE(params$drop)) "all" else "none" + } + params$drop <- arg_match0( + params$drop %||% "all", + c("all", "none", "inner"), arg_nm = "drop" ) has_x <- !(is.null(data$x) && is.null(params$x)) @@ -118,29 +123,7 @@ StatBin <- ggproto("StatBin", Stat, )) } - if (!is.null(params$drop)) { - deprecate_warn0("2.1.0", "stat_bin(drop)", "stat_bin(pad)") - params$drop <- NULL - } - if (!is.null(params$origin)) { - deprecate_warn0("2.1.0", "stat_bin(origin)", "stat_bin(boundary)") - params$boundary <- params$origin - params$origin <- NULL - } - if (!is.null(params$right)) { - deprecate_warn0("2.1.0", "stat_bin(right)", "stat_bin(closed)") - params$closed <- if (params$right) "right" else "left" - params$right <- NULL - } - if (!is.null(params$boundary) && !is.null(params$center)) { - cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified in {.fn {snake_class(self)}}.") - } - - if (is.null(params$breaks) && is.null(params$binwidth) && is.null(params$bins)) { - cli::cli_inform("{.fn {snake_class(self)}} using {.code bins = 30}. Pick better value with {.arg binwidth}.") - params$bins <- 30 - } - + params <- fix_bin_params(params, fun = snake_class(self), version = "2.1.0") params }, @@ -149,33 +132,20 @@ StatBin <- ggproto("StatBin", Stat, compute_group = function(data, scales, binwidth = NULL, bins = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad = FALSE, - breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all", + breaks = NULL, flipped_aes = FALSE, drop = "all", # The following arguments are not used, but must # be listed so parameters are computed correctly - origin = NULL, right = NULL, drop = NULL) { + origin = NULL, right = NULL) { x <- flipped_names(flipped_aes)$x - if (!is.null(breaks)) { - if (is.function(breaks)) { - breaks <- breaks(data[[x]]) - } - if (!scales[[x]]$is_discrete()) { - breaks <- scales[[x]]$transform(breaks) - } - bins <- bin_breaks(breaks, closed) - } else if (!is.null(binwidth)) { - if (is.function(binwidth)) { - binwidth <- binwidth(data[[x]]) - } - bins <- bin_breaks_width(scales[[x]]$dimension(), binwidth, - center = center, boundary = boundary, closed = closed) - } else { - bins <- bin_breaks_bins(scales[[x]]$dimension(), bins, center = center, - boundary = boundary, closed = closed) - } + bins <- compute_bins( + data[[x]], scales[[x]], + breaks = breaks, binwidth = binwidth, bins = bins, + center = center, boundary = boundary, closed = closed + ) bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad) keep <- switch( - keep.zeroes, + drop, none = bins$count != 0, inner = inner_runs(bins$count != 0), TRUE diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index bdb69db23a..fe27a41162 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -1,7 +1,4 @@ -#' @param bins numeric vector giving number of bins in both vertical and -#' horizontal directions. Set to 30 by default. -#' @param binwidth Numeric vector giving bin width in both vertical and -#' horizontal directions. Overrides `bins` if both set. +#' @inheritParams stat_bin #' @param drop if `TRUE` removes all cells with 0 counts. #' @export #' @rdname geom_bin_2d @@ -11,11 +8,21 @@ #' ncount = "count, scaled to maximum of 1.", #' ndensity = "density, scaled to a maximum of 1." #' ) +#' @section Controlling binning parameters for the x and y directions: +#' The arguments `bins`, `binwidth`, `breaks`, `center`, and `boundary` can +#' be set separately for the x and y directions. When given as a scalar, one +#' value applies to both directions. When given as a vector of length two, +#' the first is applied to the x direction and the second to the y direction. +#' Alternatively, these can be a named list containing `x` and `y` elements, +#' for example `list(x = 10, y = 20)`. stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "tile", position = "identity", ..., bins = 30, binwidth = NULL, + center = NULL, + boundary = NULL, + breaks = NULL, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -31,6 +38,9 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, params = list2( bins = bins, binwidth = binwidth, + center = center, + boundary = boundary, + breaks = breaks, drop = drop, na.rm = na.rm, ... @@ -45,48 +55,37 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, stat_bin2d <- stat_bin_2d #' @rdname ggplot2-ggproto +#' @include stat-summary-2d.R #' @format NULL #' @usage NULL #' @export -StatBin2d <- ggproto("StatBin2d", Stat, +StatBin2d <- ggproto( + "StatBin2d", StatSummary2d, default_aes = aes(weight = 1, fill = after_stat(count)), required_aes = c("x", "y"), compute_group = function(data, scales, binwidth = NULL, bins = 30, - breaks = NULL, origin = NULL, drop = TRUE) { + breaks = NULL, origin = NULL, drop = TRUE, + boundary = NULL, closed = NULL, center = NULL) { - origin <- dual_param(origin, list(NULL, NULL)) - binwidth <- dual_param(binwidth, list(NULL, NULL)) - breaks <- dual_param(breaks, list(NULL, NULL)) - bins <- dual_param(bins, list(x = 30, y = 30)) + data$z <- data$weight %||% 1 + data$weight <- NULL - xbreaks <- bin2d_breaks(scales$x, breaks$x, origin$x, binwidth$x, bins$x) - ybreaks <- bin2d_breaks(scales$y, breaks$y, origin$y, binwidth$y, bins$y) + # For backward compatibility, boundary defaults to 0 + boundary <- boundary %||% if (is.null(center)) list(x = 0, y = 0) - xbin <- cut(data$x, xbreaks, include.lowest = TRUE, labels = FALSE) - ybin <- cut(data$y, ybreaks, include.lowest = TRUE, labels = FALSE) - - if (is.null(data$weight)) - data$weight <- 1 - - out <- tapply_df(data$weight, list(xbin = xbin, ybin = ybin), sum, drop = drop) - - xdim <- bin_loc(xbreaks, out$xbin) - out$x <- xdim$mid - out$width <- xdim$length - - ydim <- bin_loc(ybreaks, out$ybin) - out$y <- ydim$mid - out$height <- ydim$length + out <- StatSummary2d$compute_group( + data, scales, binwidth = binwidth, bins = bins, breaks = breaks, + drop = drop, fun = "sum", boundary = boundary, closed = closed, + center = center + ) out$count <- out$value out$ncount <- out$count / max(out$count, na.rm = TRUE) out$density <- out$count / sum(out$count, na.rm = TRUE) out$ndensity <- out$density / max(out$density, na.rm = TRUE) out - }, - - dropped_aes = "weight" # No longer available after transformation + } ) dual_param <- function(x, default = list(x = NULL, y = NULL)) { @@ -102,53 +101,3 @@ dual_param <- function(x, default = list(x = NULL, y = NULL)) { list(x = x, y = x) } } - -bin2d_breaks <- function(scale, breaks = NULL, origin = NULL, binwidth = NULL, - bins = 30, closed = "right") { - # Bins for categorical data should take the width of one level, - # and should show up centered over their tick marks. All other parameters - # are ignored. - if (scale$is_discrete()) { - breaks <- scale$get_breaks() - return(-0.5 + seq_len(length(breaks) + 1)) - } else { - if (!is.null(breaks)) { - breaks <- scale$transform(breaks) - } - } - - if (!is.null(breaks)) - return(breaks) - - range <- scale$get_limits() - - if (is.null(binwidth) || identical(binwidth, NA)) { - binwidth <- diff(range) / bins - } - check_number_decimal(binwidth) - - if (is.null(origin) || identical(origin, NA)) { - origin <- round_any(range[1], binwidth, floor) - } - check_number_decimal(origin) - - breaks <- seq(origin, range[2] + binwidth, binwidth) - - # Check if the last bin lies fully outside the range - if (length(breaks) > 1 && breaks[length(breaks) - 1] >= range[2]) { - breaks <- breaks[-length(breaks)] - } - bins(breaks, closed)$fuzzy -} - -bin_loc <- function(x, id) { - left <- x[-length(x)] - right <- x[-1] - - list( - left = left[id], - right = right[id], - mid = ((left + right) / 2)[id], - length = diff(x)[id] - ) -} diff --git a/R/stat-bindot.R b/R/stat-bindot.R index 85eecc4d54..66184a527c 100644 --- a/R/stat-bindot.R +++ b/R/stat-bindot.R @@ -77,13 +77,11 @@ StatBindot <- ggproto("StatBindot", Stat, } if (method == "histodot") { - closed <- if (right) "right" else "left" - if (!is.null(binwidth)) { - bins <- bin_breaks_width(range, binwidth, boundary = origin, closed = closed) - } else { - bins <- bin_breaks_bins(range, 30, boundary = origin, closed = closed) - } - + bins <- compute_bins( + values, scales[[binaxis]], + breaks = NULL, binwidth = binwidth, bins = 30, center = NULL, + boundary = origin, closed = if (right) "right" else "left" + ) data <- bin_vector(values, bins, weight = data$weight, pad = FALSE) # Change "width" column to "binwidth" for consistency diff --git a/R/stat-binhex.R b/R/stat-binhex.R index 0b5d3991c6..be5b61daf7 100644 --- a/R/stat-binhex.R +++ b/R/stat-binhex.R @@ -7,6 +7,13 @@ #' ncount = "count, scaled to maximum of 1.", #' ndensity = "density, scaled to maximum of 1." #' ) +#' @section Controlling binning parameters for the x and y directions: +#' The arguments `bins` and `binwidth` can +#' be set separately for the x and y directions. When given as a scalar, one +#' value applies to both directions. When given as a vector of length two, +#' the first is applied to the x direction and the second to the y direction. +#' Alternatively, these can be a named list containing `x` and `y` elements, +#' for example `list(x = 10, y = 20)`. stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex", position = "identity", ..., diff --git a/R/stat-summary-2d.R b/R/stat-summary-2d.R index 60e5e49813..41d0c5b588 100644 --- a/R/stat-summary-2d.R +++ b/R/stat-summary-2d.R @@ -28,6 +28,7 @@ #' @param drop drop if the output of `fun` is `NA`. #' @param fun function for summary. #' @param fun.args A list of extra arguments to pass to `fun` +#' @inheritSection stat_bin_2d Controlling binning parameters for the x and y directions #' @export #' @examples #' d <- ggplot(diamonds, aes(carat, depth, z = price)) @@ -92,31 +93,50 @@ StatSummary2d <- ggproto("StatSummary2d", Stat, required_aes = c("x", "y", "z"), dropped_aes = "z", # z gets dropped during statistical transformation + setup_params = function(self, data, params) { + + if (is.character(params$drop)) { + params$drop <- !identical(params$drop, "none") + } + + params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") + vars <- c("origin", "binwidth", "breaks", "center", "boundary") + params[vars] <- lapply(params[vars], dual_param, default = NULL) + params$closed <- dual_param(params$closed, list(x = "right", y = "right")) + + params + }, + compute_group = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, - fun = "mean", fun.args = list()) { - origin <- dual_param(origin, list(NULL, NULL)) - binwidth <- dual_param(binwidth, list(NULL, NULL)) - breaks <- dual_param(breaks, list(NULL, NULL)) + fun = "mean", fun.args = list(), + boundary = 0, closed = NULL, center = NULL) { bins <- dual_param(bins, list(x = 30, y = 30)) - xbreaks <- bin2d_breaks(scales$x, breaks$x, origin$x, binwidth$x, bins$x) - ybreaks <- bin2d_breaks(scales$y, breaks$y, origin$y, binwidth$y, bins$y) - - xbin <- cut(data$x, xbreaks, include.lowest = TRUE, labels = FALSE) - ybin <- cut(data$y, ybreaks, include.lowest = TRUE, labels = FALSE) + xbin <- compute_bins( + data$x, scales$x, breaks$x, binwidth$x, bins$x, + center$x, boundary$x, closed$x + ) + ybin <- compute_bins( + data$y, scales$y, breaks$y, binwidth$y, bins$y, + center$y, boundary$y, closed$y + ) + cut_id <- list( + xbin = as.integer(bin_cut(data$x, xbin)), + ybin = as.integer(bin_cut(data$y, ybin)) + ) fun <- as_function(fun) f <- function(x) { inject(fun(x, !!!fun.args)) } - out <- tapply_df(data$z, list(xbin = xbin, ybin = ybin), f, drop = drop) + out <- tapply_df(data$z, cut_id, f, drop = drop) - xdim <- bin_loc(xbreaks, out$xbin) + xdim <- bin_loc(xbin$breaks, out$xbin) out$x <- xdim$mid out$width <- xdim$length - ydim <- bin_loc(ybreaks, out$ybin) + ydim <- bin_loc(ybin$breaks, out$ybin) out$y <- ydim$mid out$height <- ydim$length diff --git a/R/stat-summary-bin.R b/R/stat-summary-bin.R index a56bea189e..e3db18b102 100644 --- a/R/stat-summary-bin.R +++ b/R/stat-summary-bin.R @@ -79,16 +79,21 @@ StatSummaryBin <- ggproto("StatSummaryBin", Stat, compute_group = function(data, scales, fun = NULL, bins = 30, binwidth = NULL, breaks = NULL, origin = NULL, right = FALSE, na.rm = FALSE, - flipped_aes = FALSE, width = NULL) { - data <- flip_data(data, flipped_aes) + flipped_aes = FALSE, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { + x <- flipped_names(flipped_aes)$x - breaks <- bin2d_breaks(scales[[x]], breaks, origin, binwidth, bins, - closed = if (right) "right" else "left") + bins <- compute_bins( + data[[x]], scales[[x]], + breaks = breaks, binwidth = binwidth, bins = bins, + center = center, boundary = boundary, closed = closed + ) + data$bin <- bin_cut(data[[x]], bins) - data$bin <- cut(data$x, breaks, include.lowest = TRUE, labels = FALSE) + data <- flip_data(data, flipped_aes) out <- dapply(data, "bin", fun %||% function(df) mean_se(df$y)) - locs <- bin_loc(breaks, out$bin) + locs <- bin_loc(bins$breaks, out$bin) out$x <- locs$mid out$width <- width %||% if (scales[[x]]$is_discrete()) 0.9 else locs$length out$flipped_aes <- flipped_aes diff --git a/man/geom_bin_2d.Rd b/man/geom_bin_2d.Rd index fa3b32b4ce..dc0b9ce082 100644 --- a/man/geom_bin_2d.Rd +++ b/man/geom_bin_2d.Rd @@ -26,6 +26,9 @@ stat_bin_2d( ..., bins = 30, binwidth = NULL, + center = NULL, + boundary = NULL, + breaks = NULL, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -115,11 +118,33 @@ the default plot specification, e.g. \code{\link[=borders]{borders()}}.} these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} - -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} + +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} + +\item{center, boundary}{bin position specifiers. Only one, \code{center} or +\code{boundary}, may be specified for a single plot. \code{center} specifies the +center of one of the bins. \code{boundary} specifies the boundary between two +bins. Note that if either is above or below the range of the data, things +will be shifted by the appropriate integer multiple of \code{binwidth}. +For example, to center on integers use \code{binwidth = 1} and \code{center = 0}, even +if \code{0} is outside the range of the data. Alternatively, this same alignment +can be specified with \code{binwidth = 1} and \code{boundary = 0.5}, even if \code{0.5} is +outside the range of the data.} + +\item{breaks}{Alternatively, you can supply a numeric vector giving +the bin boundaries. Overrides \code{binwidth}, \code{bins}, \code{center}, +and \code{boundary}. Can also be a function that takes group-wise values as input and returns bin boundaries.} \item{drop}{if \code{TRUE} removes all cells with 0 counts.} } @@ -153,6 +178,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins}, \code{binwidth}, \code{breaks}, \code{center}, and \code{boundary} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(x, y)) + xlim(4, 10) + ylim(4, 10) d + geom_bin_2d() @@ -160,7 +195,7 @@ d + geom_bin_2d() # You can control the size of the bins by specifying the number of # bins in each direction: d + geom_bin_2d(bins = 10) -d + geom_bin_2d(bins = 30) +d + geom_bin_2d(bins = list(x = 30, y = 10)) # Or by specifying the width of the bins d + geom_bin_2d(binwidth = c(0.1, 0.1)) diff --git a/man/geom_hex.Rd b/man/geom_hex.Rd index 553787761b..da103b0e3c 100644 --- a/man/geom_hex.Rd +++ b/man/geom_hex.Rd @@ -112,11 +112,19 @@ the default plot specification, e.g. \code{\link[=borders]{borders()}}.} \code{stat_bin_hex()}. For more information about overriding these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} - -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} + +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} } \description{ Divides the plane into regular hexagons, counts the number of cases in @@ -162,6 +170,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins} and \code{binwidth} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(carat, price)) d + geom_hex() diff --git a/man/geom_histogram.Rd b/man/geom_histogram.Rd index 32f9c39610..0a27e87c10 100644 --- a/man/geom_histogram.Rd +++ b/man/geom_histogram.Rd @@ -46,7 +46,7 @@ stat_bin( closed = c("right", "left"), pad = FALSE, na.rm = FALSE, - keep.zeroes = "all", + drop = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE @@ -174,10 +174,11 @@ or left edges of bins are included in the bin.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures frequency polygons touch 0. Defaults to \code{FALSE}.} -\item{keep.zeroes}{Treatment of zero count bins. If \code{"all"} (default), such +\item{drop}{Treatment of zero count bins. If \code{"all"} (default), such bins are kept as-is. If \code{"none"}, all zero count bins are filtered out. If \code{"inner"} only zero count bins at the flanks are filtered out, but not -in the middle.} +in the middle. \code{TRUE} is shorthand for \code{"all"} and \code{FALSE} is shorthand +for \code{"none"}.} } \description{ Visualise the distribution of a single continuous variable by dividing diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index ea01c29996..6658fdafb9 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -19,14 +19,14 @@ % R/position-dodge.R, R/position-dodge2.R, R/position-identity.R, % R/position-jitter.R, R/position-jitterdodge.R, R/position-nudge.R, % R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, -% R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, -% R/stat-align.R, R/stat-bin.R, R/stat-bin2d.R, R/stat-bindot.R, +% R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, +% R/stat-bin.R, R/stat-summary-2d.R, R/stat-bin2d.R, R/stat-bindot.R, % R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, % R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, % R/stat-function.R, R/stat-identity.R, R/stat-manual.R, R/stat-qq-line.R, % R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-2d.R, R/stat-summary-bin.R, R/stat-summary-hex.R, -% R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R +% R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} \alias{ggplot2-ggproto} @@ -126,6 +126,7 @@ \alias{ScaleContinuousIdentity} \alias{StatAlign} \alias{StatBin} +\alias{StatSummary2d} \alias{StatBin2d} \alias{StatBindot} \alias{StatBinhex} @@ -146,7 +147,6 @@ \alias{StatQuantile} \alias{StatSmooth} \alias{StatSum} -\alias{StatSummary2d} \alias{StatSummaryBin} \alias{StatSummaryHex} \alias{StatSummary} diff --git a/man/stat_summary_2d.Rd b/man/stat_summary_2d.Rd index 9ee4604b65..da62dd0a15 100644 --- a/man/stat_summary_2d.Rd +++ b/man/stat_summary_2d.Rd @@ -113,11 +113,19 @@ lists which parameters it can accept. \link[=draw_key]{key glyphs}, to change the display of the layer in the legend. }} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} \item{drop}{drop if the output of \code{fun} is \code{NA}.} @@ -173,6 +181,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins}, \code{binwidth}, \code{breaks}, \code{center}, and \code{boundary} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(carat, depth, z = price)) d + stat_summary_2d() diff --git a/tests/testthat/_snaps/stat-bin.md b/tests/testthat/_snaps/stat-bin.md index 2b5ee05525..db0b8f44c0 100644 --- a/tests/testthat/_snaps/stat-bin.md +++ b/tests/testthat/_snaps/stat-bin.md @@ -23,51 +23,30 @@ # inputs to binning are checked - Computation failed in `stat_bin()`. - Caused by error in `bins()`: - ! `breaks` must be a vector, not a character vector. + `breaks` must be a vector, not a character vector. --- - `x_range` must be a vector of length 2, not length 1. + `binwidth` must be a number, not a character vector. --- - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_width()`: - ! `binwidth` must be a number, not a character vector. + `binwidth` must be a number larger than or equal to 0, not the number -4. --- - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_width()`: - ! `binwidth` must be a number larger than or equal to 0, not the number -4. - ---- - - `x_range` must be a vector of length 2, not length 1. - ---- - - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_bins()`: - ! `bins` must be a whole number larger than or equal to 1, not the number -4. + `bins` must be a whole number larger than or equal to 1, not the number -4. # setting boundary and center - Code - comp_bin(df, boundary = 5, center = 0) - Condition - Error in `stat_bin()`: - ! Problem while computing stat. - i Error occurred in the 1st layer. - Caused by error in `setup_params()`: - ! Only one of `boundary` and `center` may be specified in `stat_bin()`. + Computation failed in `stat_bin()`. + Caused by error in `compute_bins()`: + ! Only one of `boundary` and `center` may be specified. # bin errors at high bin counts Code - bin_breaks_width(c(1, 2e+06), 1) + compute_bins(c(1, 2e+06), binwidth = 1) Condition Error in `bin_breaks_width()`: ! The number of histogram bins must be less than 1,000,000. diff --git a/tests/testthat/_snaps/stat-bin2d.md b/tests/testthat/_snaps/stat-bin2d.md index ffc60d7f92..a0bb2eebc7 100644 --- a/tests/testthat/_snaps/stat-bin2d.md +++ b/tests/testthat/_snaps/stat-bin2d.md @@ -1,12 +1,12 @@ # binwidth is respected Computation failed in `stat_bin2d()`. - Caused by error in `bin2d_breaks()`: + Caused by error in `compute_bins()`: ! `binwidth` must be a number, not a double vector. --- Computation failed in `stat_bin2d()`. - Caused by error in `bin2d_breaks()`: - ! `origin` must be a number, not a double vector. + Caused by error in `compute_bins()`: + ! `boundary` must be a number or `NULL`, not a double vector. diff --git a/tests/testthat/test-stat-bin.R b/tests/testthat/test-stat-bin.R index a114748daf..3df87821b8 100644 --- a/tests/testthat/test-stat-bin.R +++ b/tests/testthat/test-stat-bin.R @@ -118,17 +118,17 @@ test_that("stat_bin() provides width (#3522)", { expect_equal(out$xmax - out$xmin, rep(binwidth, 10)) }) -test_that("stat_bin(keep.zeroes) options work as intended", { +test_that("stat_bin(drop) options work as intended", { p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) + scale_x_continuous(limits = c(-1, 9)) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "all")) expect_equal(ld$x, -1:9) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "inner")) expect_equal(ld$x, c(1:7)) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "none")) expect_equal(ld$x, c(1:3, 5:7)) }) @@ -147,19 +147,19 @@ test_that("bins is strictly adhered to", { # Default case nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins)$breaks) + length(compute_bins(c(0, 10), bins = bins)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) # Center is provided nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins, center = 0)$breaks) + length(compute_bins(c(0, 10), bins = bins, center = 0)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) # Boundary is provided nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins, boundary = 0)$breaks) + length(compute_bins(c(0, 10), bins = bins, boundary = 0)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) @@ -172,13 +172,10 @@ comp_bin <- function(df, ...) { test_that("inputs to binning are checked", { dat <- data_frame(x = c(0, 10)) - expect_snapshot_error(comp_bin(dat, breaks = letters)) - expect_snapshot_error(bin_breaks_width(3)) - expect_snapshot_error(comp_bin(dat, binwidth = letters)) - expect_snapshot_error(comp_bin(dat, binwidth = -4)) - - expect_snapshot_error(bin_breaks_bins(3)) - expect_snapshot_error(comp_bin(dat, bins = -4)) + expect_snapshot_error(compute_bins(dat, breaks = letters)) + expect_snapshot_error(compute_bins(dat, binwidth = letters)) + expect_snapshot_error(compute_bins(dat, binwidth = -4)) + expect_snapshot_error(compute_bins(dat, bins = -4)) }) test_that("closed left or right", { @@ -208,14 +205,14 @@ test_that("setting boundary and center", { df <- data_frame(x = c(0, 30)) # Error if both boundary and center are specified - expect_snapshot(comp_bin(df, boundary = 5, center = 0), error = TRUE) + expect_snapshot_warning(comp_bin(df, boundary = 5, center = 0, bins = 30)) res <- comp_bin(df, binwidth = 10, boundary = 0, pad = FALSE) expect_identical(res$count, c(1, 0, 1)) expect_identical(res$xmin[1], 0) expect_identical(res$xmax[3], 30) - res <- comp_bin(df, binwidth = 10, center = 0, pad = FALSE) + res <- comp_bin(df, binwidth = 10, center = 0, boundary = NULL, pad = FALSE) expect_identical(res$count, c(1, 0, 0, 1)) expect_identical(res$xmin[1], df$x[1] - 5) expect_identical(res$xmax[4], df$x[2] + 5) @@ -230,7 +227,7 @@ test_that("weights are added", { }) test_that("bin errors at high bin counts", { - expect_snapshot(bin_breaks_width(c(1, 2e6), 1), error = TRUE) + expect_snapshot(compute_bins(c(1, 2e6), binwidth = 1), error = TRUE) }) # stat_count -------------------------------------------------------------- diff --git a/tests/testthat/test-stat-bin2d.R b/tests/testthat/test-stat-bin2d.R index 54d95679c9..6d83448956 100644 --- a/tests/testthat/test-stat-bin2d.R +++ b/tests/testthat/test-stat-bin2d.R @@ -14,7 +14,7 @@ test_that("binwidth is respected", { expect_snapshot_warning(ggplot_build(p)) p <- ggplot(df, aes(x, y)) + - stat_bin_2d(geom = "tile", origin = c(0.25, 0.5, 0.75)) + stat_bin_2d(geom = "tile", boundary = c(0.25, 0.5, 0.75)) expect_snapshot_warning(ggplot_build(p)) })