add type annotations to param docs, closes #243

mjskay · Jul 27, 2024 · 5b77183 · 5b77183
1 parent 650b569
commit 5b77183
Show file tree

Hide file tree

Showing 104 changed files with 1,155 additions and 1,047 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -17,6 +17,11 @@ Major changes:
 * Bin edge openness can now be set via the `right_closed` and `outermost_closed`
   arguments to `density_histogram()` (#238).
 
+Documentation:
+
+* Almost all parameter documentation (except some inherited from other packages)
+  now includes an annotation listing valid data types for that parameter (#243). 
+
 Bug fixes:
 
 * Ensure `point_interval(<distribution>)` supports tidy evaluation on the

diff --git a/R/abstract_geom.R b/R/abstract_geom.R
@@ -65,7 +65,7 @@ AbstractGeom = ggproto("AbstractGeom", Geom,
 
   param_docs = list(
     orientation = glue_doc('
-      Whether this geom is drawn horizontally or vertically. One of:
+      <[string][character]> Whether this geom is drawn horizontally or vertically. One of:
       \\itemize{
         \\item `NA` (default): automatically detect the orientation based on how the aesthetics
           are assigned. Automatic detection works most of the time.
@@ -82,8 +82,8 @@ AbstractGeom = ggproto("AbstractGeom", Geom,
       '),
 
     na.rm = glue_doc('
-      If `FALSE`, the default, missing values are removed with a warning. If `TRUE`, missing
-      values are silently removed.
+      <scalar [logical]> If `FALSE`, the default, missing values are removed with a warning. If `TRUE`,
+      missing values are silently removed.
       ')
   ),
 

diff --git a/R/auto_partial.R b/R/auto_partial.R
@@ -123,10 +123,10 @@ partial_self = function(name = NULL, waivable = TRUE) {
 }
 
 #' @rdname auto_partial
-#' @param f A function
-#' @param name A character string giving the name of the function, to be used
+#' @param f <[function]> Function to automatically partially-apply.
+#' @param name <[string][character]> Name of the function, to be used
 #' when printing.
-#' @param waivable logical: if `TRUE`, optional arguments that get
+#' @param waivable <scalar [logical]> If `TRUE`, optional arguments that get
 #' passed a [waiver()] will keep their default value (or whatever
 #' non-`waiver` value has been most recently partially applied for that
 #' argument).

diff --git a/R/binning_methods.R b/R/binning_methods.R
@@ -11,17 +11,17 @@
 #'
 #' Bins the provided data values using one of several dotplot algorithms.
 #'
-#' @param x numeric vector of x values
-#' @param y numeric vector of y values
-#' @param binwidth bin width
-#' @param heightratio ratio of bin width to dot height
-#' @param stackratio ratio of dot height to vertical distance between dot
-#' centers
+#' @param x <[numeric]> *x* values.
+#' @param y <[numeric]> *y* values (same length as `x`).
+#' @param binwidth <scalar [numeric]> Bin width.
+#' @param heightratio <scalar [numeric]> Ratio of bin width to dot height
+#' @param stackratio <scalar [numeric]> Ratio of dot height to vertical distance
+#' between dot centers
 #' @eval rd_param_dots_layout()
 #' @eval rd_param_dots_overlaps()
 #' @eval rd_param_slab_side()
-#' @param orientation Whether the dots are laid out horizontally or vertically.
-#' Follows the naming scheme of [geom_slabinterval()]:
+#' @param orientation <[string][character]> Whether the dots are laid out horizontally
+#' or vertically. Follows the naming scheme of [geom_slabinterval()]:
 #'
 #'   - `"horizontal"` assumes the data values for the dotplot are in the `x`
 #'   variable and that dots will be stacked up in the `y` direction.
@@ -193,11 +193,11 @@ bin_dots = function(x, y, binwidth,
 #' Searches for a nice-looking bin width to use to draw a dotplot such that
 #' the height of the dotplot fits within a given space (`maxheight`).
 #'
-#' @param x numeric vector of values
-#' @param maxheight maximum height of the dotplot
-#' @param heightratio ratio of bin width to dot height
-#' @param stackratio ratio of dot height to vertical distance between dot
-#' centers
+#' @param x <[numeric]> Data values.
+#' @param maxheight <scalar [numeric]> Maximum height of the dotplot.
+#' @param heightratio <scalar [numeric]> Ratio of bin width to dot height.
+#' @param stackratio <scalar [numeric]> Ratio of dot height to vertical distance
+#' between dot centers
 #' @eval rd_param_dots_layout()
 #'
 #' @details

diff --git a/R/bounder.R b/R/bounder.R
@@ -11,8 +11,8 @@
 #' the order statistics of the sample. Use with the `bounder` argument to [density_bounded()].
 #' @template description-auto-partial-waivable
 #'
-#' @param x numeric vector containing a sample to estimate the bounds of.
-#' @param p scalar in \eqn{[0,1]}: percentile of the order statistic distribution to use
+#' @param x <[numeric]> Sample to estimate the bounds of.
+#' @param p <scalar [numeric]> in \eqn{[0,1]}: Percentile of the order statistic distribution to use
 #'   as the estimate. `p = 1` will return `range(x)`; `p = 0.5` will give the median
 #'   estimate, `p = 0` will give a very wide estimate (effectively treating the
 #'   distribution as unbounded when used with [density_bounded()]).

diff --git a/R/curve_interval.R b/R/curve_interval.R
@@ -20,20 +20,20 @@ globalVariables(".value")
 #' See Mirzargar *et al.* (2014) or Juul *et al.* (2020) for an accessible introduction
 #' to data depth and curve boxplots / functional boxplots.
 #'
-#' @param .data One of:
+#' @param .data <[data.frame] | [rvar][posterior::rvar] | [matrix]> One of:
 #'   - A data frame (or grouped data frame as returned by [dplyr::group_by()])
 #'     that contains draws to summarize.
 #'   - A [posterior::rvar] vector.
 #'   - A matrix; in which case the first dimension should be draws and the second
 #'     dimension values of the curve.
-#' @param ... Bare column names or expressions that, when evaluated in the context of
+#' @param ... <bare [language]> Bare column names or expressions that, when evaluated in the context of
 #' `.data`, represent draws to summarize. If this is empty, then by default all
 #' columns that are not group columns and which are not in `.exclude` (by default
 #' `".chain"`, `".iteration"`, `".draw"`, and `".row"`) will be summarized.
 #' This can be numeric columns, list columns containing numeric vectors, or
 #' [posterior::rvar()]s.
-#' @param .along Which columns are the input values to the function describing the curve (e.g., the "x"
-#' values). Supports [tidyselect][tidyselect::language] syntax. Intervals are calculated jointly with
+#' @param .along <[tidyselect][tidyselect::language]> Which columns are the input values to the function
+#' describing the curve (e.g., the "x" values). Intervals are calculated jointly with
 #' respect to these variables, conditional on all other grouping variables in the data frame. The default
 #' (`NULL`) causes [curve_interval()] to use all grouping variables in the input data frame as the value
 #' for `.along`, which will generate the most conservative intervals. However, if you want to calculate
@@ -42,25 +42,25 @@ globalVariables(".value")
 #' conditional on `g`. To avoid selecting any variables as input values to the function describing the
 #' curve, use `character()`; this will produce conditional intervals only (the result in this case should
 #' be very similar to `median_qi()`). Currently only supported when `.data` is a data frame.
-#' @param .width vector of probabilities to use that determine the widths of the resulting intervals.
-#' If multiple probabilities are provided, multiple rows per group are generated, each with
+#' @param .width <[numeric]> Vector of probabilities to use that determine the widths of the resulting
+#' intervals. If multiple probabilities are provided, multiple rows per group are generated, each with
 #' a different probability interval (and value of the corresponding `.width` column).
-#' @param .interval The method used to calculate the intervals. Currently, all methods rank the curves
-#' using some measure of *data depth*, then create envelopes containing the `.width`% "deepest" curves.
-#' Available methods are:
+#' @param .interval <[string][character]> The method used to calculate the intervals. Currently, all
+#' methods rank the curves using some measure of *data depth*, then create envelopes containing the
+#' `.width`% "deepest" curves. Available methods are:
 #'   - `"mhd"`: mean halfspace depth (Fraiman and Muniz 2001).
 #'   - `"mbd"`: modified band depth (Sun and Genton 2011): calls [fda::fbplot()] with `method = "MBD"`.
 #'   - `"bd"`: band depth (Sun and Genton 2011): calls [fda::fbplot()] with `method = "BD2"`.
 #'   - `"bd-mbd"`: band depth, breaking ties with modified band depth (Sun and Genton 2011): calls
 #'     [fda::fbplot()] with `method = "Both"`.
-#' @param .simple_names When `TRUE` and only a single column / vector is to be summarized, use the
-#' name `.lower` for the lower end of the interval and `.upper` for the
+#' @param .simple_names <scalar [logical]> When `TRUE` and only a single column / vector is to be
+#' summarized, use the name `.lower` for the lower end of the interval and `.upper` for the
 #' upper end. When `FALSE` and `.data` is a data frame,
 #' names the lower and upper intervals for each column `x` `x.lower` and `x.upper`.
-#' @param .exclude A character vector of names of columns to be excluded from summarization
+#' @param .exclude <[character]> Vector of names of columns to be excluded from summarization
 #' if no column names are specified to be summarized. Default ignores several meta-data column
 #' names used in \pkg{ggdist} and \pkg{tidybayes}.
-#' @param na.rm logical value indicating whether `NA` values should be stripped before the computation proceeds.
+#' @param na.rm <scalar [logical]> Should `NA` values be stripped before the computation proceeds?
 #' If `FALSE` (the default), the presence of `NA` values in the columns to be summarized will generally
 #' result in an error. If `TRUE`, `NA` values will be removed in the calculation of intervals so long
 #' as `.interval` is `"mhd"`; other methods do not currently support `na.rm`. Be cautious in applying

diff --git a/R/cut_cdf_qi.R b/R/cut_cdf_qi.R
@@ -14,16 +14,16 @@
 #' done more easily by mapping the `.width` or `level` computed variable to
 #' slab fill or color. See **Examples**.
 #'
-#' @param p A numeric vector of values from a cumulative distribution function,
+#' @param p <[numeric]> Vector of values from a cumulative distribution function,
 #' such as values returned by `p`-prefixed distribution functions in base R (e.g. [pnorm()]),
 #' the [`cdf()`][distributional::cdf] function, or values of the `cdf` computed aesthetic from the
 #' [stat_slabinterval()] family of stats.
-#' @param .width vector of probabilities to use that determine the widths of the resulting intervals.
-#' @param labels One of:
-#'   - `NULL` to use the default labels (`.width` converted to a character vector).
+#' @param .width <[numeric]> Vector of probabilities to use that determine the widths of the resulting intervals.
+#' @param labels <[character] | [function] | [NULL]> One of:
 #'   - A character vector giving labels (must be same length as `.width`)
 #'   - A function that takes numeric probabilities as input and returns labels as output
 #'     (a good candidate might be [scales::percent_format()]).
+#'   - `NULL` to use the default labels (`.width` converted to a character vector).
 #'
 #' @return
 #' An [ordered] factor of the same length as `p` giving the quantile interval to

diff --git a/R/density.R b/R/density.R
@@ -400,14 +400,22 @@ plot.ggdist_density = function(x, ..., ylim = c(0, NA)) {
 #' @template description-auto-partial-waivable
 #'
 #' @inheritDotParams stats::bw.SJ
-#' @param x A numeric vector giving a sample.
+#' @param x <[numeric]> Vector containing a sample.
 #' @details
 #' These are loose wrappers around the corresponding `bw.`-prefixed functions
 #' in \pkg{stats}. See, for example, [bw.SJ()].
 #'
 #' [bandwidth_dpi()], which is the default bandwidth estimator in \pkg{ggdist},
 #' is the Sheather-Jones direct plug-in estimator, i.e. `bw.SJ(..., method = "dpi")`.
 #'
+#' With the exception of [bandwidth_nrd0()], these estimators may fail in some
+#' cases, often when a sample contains many duplicates. If they do they will
+#' automatically fall back to [bandwidth_nrd0()] with a warning. However, these
+#' failures are typically symptomatic of situations where you should not want to
+#' use a kernel density estimator in the first place (e.g. data with duplicates
+#' and/or discrete data). In these cases consider using a dotplot ([geom_dots()])
+#' or histogram ([density_histogram()]) instead.
+#'
 #' @returns A single number giving the bandwidth
 #' @seealso [density_bounded()], [density_unbounded()].
 #' @name bandwidth

diff --git a/R/geom_blur_dots.R b/R/geom_blur_dots.R
@@ -135,7 +135,8 @@ GeomBlurDots = ggproto("GeomBlurDots", GeomDots,
   ), GeomDots$default_params),
 
   param_docs = defaults(list(
-    blur = glue_doc('Blur function to apply to dots.
+    blur = glue_doc('
+      <[function] | [string][character]> Blur function to apply to dots.
       One of: \\itemize{
         \\item A function that takes a numeric vector of distances from the dot
           center, the dot radius, and the standard deviation of the blur and returns
@@ -236,11 +237,11 @@ geom_blur_dots = make_geom(GeomBlurDots)
 #' Methods for constructing blurs, as used in the `blur` argument to
 #' [geom_blur_dots()] or [stat_mcse_dots()].
 #' @template description-auto-partial-waivable
-#' @param x numeric vector of positive distances from the center of the dot
+#' @param x <[numeric]> Vector of positive distances from the center of the dot
 #' (assumed to be 0) to evaluate blur function at.
-#' @param r radius of the dot that is being blurred.
-#' @param sd standard deviation of the dot that is being blurred.
-#' @param .width for `blur_interval()`, a probability giving the width of
+#' @param r <scalar [numeric]> Radius of the dot that is being blurred.
+#' @param sd <scalar [numeric]> Standard deviation of the dot that is being blurred.
+#' @param .width <scalar [numeric]> For `blur_interval()`, a probability giving the width of
 #' the interval.
 #' @name blur
 #' @details
@@ -264,11 +265,11 @@ geom_blur_dots = make_geom(GeomBlurDots)
 #' dot at 50% opacity, where the interval is a Gaussian quantile interval with
 #' mass equal to `.width` and standard deviation `sd`.
 #' @returns
-#' A vector of length `x` giving the opacity of the radial gradient representing
-#' the dot at each `x` value.
+#' A vector with the same length as `x` giving the opacity of the radial
+#' gradient representing the dot at each `x` value.
 #' @seealso
 #' [geom_blur_dots()] and [stat_mcse_dots()] for geometries making use of
-#' `blur`s.
+#' `blur` functions.
 #' @examples
 #' # see examples in geom_blur_dots()
 #' @importFrom stats pnorm

diff --git a/R/geom_dotsinterval.R b/R/geom_dotsinterval.R
@@ -468,7 +468,8 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
   ## parameters --------------------------------------------------------------
 
   param_docs = defaults(list(
-    binwidth = glue_doc('The bin width to use for laying out the dots.
+    binwidth = glue_doc('
+      <[numeric] | [unit]> The bin width to use for laying out the dots.
       One of:
         - `NA` (the default): Dynamically select the bin width based on the
           size of the plot when drawn. This will pick a `binwidth` such that the
@@ -488,18 +489,21 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
       10% of the viewport size (while still ensuring the tallest stack is less than
       or equal to `scale`).
       '),
-    dotsize = glue_doc('The width of the dots relative to the `binwidth`. The default,
+    dotsize = glue_doc('
+      <scalar [numeric]> The width of the dots relative to the `binwidth`. The default,
       `1.07`, makes dots be just a bit wider than the bin width, which is a
       manually-tuned parameter that tends to work well with the default circular
       shape, preventing gaps between bins from appearing to be too large visually
       (as might arise from dots being *precisely* the `binwidth`). If it is desired
       to have dots be precisely the `binwidth`, set `dotsize = 1`.
       '),
-    stackratio = glue_doc('The distance between the center of the dots in the same
+    stackratio = glue_doc('
+      <scalar [numeric]> The distance between the center of the dots in the same
       stack relative to the dot height. The default, `1`, makes dots in the same
       stack just touch each other.
       '),
-    smooth = glue_doc('Smoother to apply to dot positions.
+    smooth = glue_doc('
+      <[function] | [string][character]> Smoother to apply to dot positions.
       One of:
         - A function that takes a numeric vector of dot positions and returns a
           smoothed version of that vector, such as `smooth_bounded()`,
@@ -512,7 +516,8 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
       Smoothing is most effective when the smoother is matched to the support of
       the distribution; e.g. using `smooth_bounded(bounds = ...)`.
       '),
-    overflow = glue_doc('How to handle overflow of dots beyond the extent of the geom
+    overflow = glue_doc('
+      <[string][character]> How to handle overflow of dots beyond the extent of the geom
       when a minimum `binwidth` (or an exact `binwidth`) is supplied.
       One of:
         - `"keep"`: Keep the overflow, drawing dots outside the geom bounds.
@@ -527,8 +532,8 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
       with dots overlapping, consider setting `overflow = "compress"` and supplying
       an exact or minimum dot size using `binwidth`.
       '),
-    layout = glue_doc('The layout method used
-      for the dots: \\itemize{
+    layout = glue_doc('
+      <[string][character]> The layout method used for the dots. One of: \\itemize{
         \\item `"bin"` (default): places dots on the off-axis at the midpoint of
           their bins as in the classic Wilkinson dotplot. This maintains the
           alignment of rows and columns in the dotplot. This layout is slightly
@@ -552,7 +557,8 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
         \\item `"bar"`: for discrete distributions, lays out duplicate values in
           rectangular bars.
       }'),
-    overlaps = glue_doc('How to handle overlapping dots or bins in the `"bin"`,
+    overlaps = glue_doc('
+      <[string][character]> How to handle overlapping dots or bins in the `"bin"`,
       `"weave"`, and `"hex"` layouts (dots never overlap in the `"swarm"` or `"bar"` layouts).
       For the purposes of this argument, dots are only considered to be overlapping
       if they would be overlapping when `dotsize = 1` and `stackratio = 1`; i.e.
@@ -565,7 +571,8 @@ GeomDotsinterval = ggproto("GeomDotsinterval", GeomSlabinterval,
           dots to their desired positions, subject to the constraint that adjacent
           dots do not overlap.
       }'),
-    verbose = glue_doc('If `TRUE`, print out the bin width of the dotplot. Can be useful
+    verbose = glue_doc('
+      <scalar [logical]> If `TRUE`, print out the bin width of the dotplot. Can be useful
       if you want to start from an automatically-selected bin width and then adjust it
       manually. Bin width is printed both as data units and as normalized parent
       coordinates or `"npc"`s (see [unit()]). Note that if you just want to scale the

diff --git a/R/geom_lineribbon.R b/R/geom_lineribbon.R
@@ -152,16 +152,17 @@ GeomLineribbon = ggproto("GeomLineribbon", AbstractGeom,
 
   param_docs = defaults(list(
     step = glue_doc('
-      Should the line/ribbon be drawn as a step function? One of:
+      <scalar [logical] | [string][character]> Should the line/ribbon be drawn
+      as a step function? One of:
       \\itemize{
         \\item `FALSE` (default): do not draw as a step function.
         \\item `"mid"` (or `TRUE`): draw steps midway between adjacent x values.
         \\item `"hv"`: draw horizontal-then-vertical steps.
         \\item `"vh"`: draw as vertical-then-horizontal steps.
       }
-      `TRUE` is an alias for `"mid"` because for a step function with ribbons, `"mid"` is probably what you want
-      (for the other two step approaches the ribbons at either the very first or very last x value will not be
-      visible).
+      `TRUE` is an alias for `"mid"`, because for a step function with ribbons
+      `"mid"` is reasonable default (for the other two step approaches the ribbons
+      at either the very first or very last x value will not be visible).
       ')
   ), AbstractGeom$param_docs),