diff --git a/NEWS.md b/NEWS.md index 138bbaa..9fc9d22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ to add "inactive" replicates to a design object, so that the matrix of replicate weights has the desired number of columns. +* The function `as_fays_gen_rep_design()` now has a default value of `mse = TRUE`. Setting `mse = FALSE` will produce a warning message, since Fay's generalized replication method can sometimes produce large underestimates of variance when `mse = FALSE`. + * The function `as_random_group_jackknife_design()` now returns an object with class `tbl_svy` if the input was also an object with class `tbl_svy`. # svrep 0.6.3 diff --git a/R/fays_generalized_replication.R b/R/fays_generalized_replication.R index 3f62055..d99f1e7 100644 --- a/R/fays_generalized_replication.R +++ b/R/fays_generalized_replication.R @@ -314,8 +314,10 @@ make_fays_gen_rep_factors <- function( #' for details of the approximation. #' @param compress This reduces the computer memory required to represent the replicate weights and has no #' impact on estimates. -#' @param mse If \code{TRUE}, compute variances from sums of squares around the point estimate from the full-sample weights, +#' @param mse If \code{TRUE} (the default), compute variances from sums of squares around the point estimate from the full-sample weights, #' If \code{FALSE}, compute variances from sums of squares around the mean estimate from the replicate weights. +#' For Fay's generalized replication method, setting \code{mse = FALSE} can potentially +#' lead to large underestimates of variance. #' @return #' A replicate design object, with class \code{svyrep.design}, which can be used with the usual functions, #' such as \code{svymean()} or \code{svyglm()}. @@ -415,8 +417,13 @@ as_fays_gen_rep_design <- function(design, variance_estimator = NULL, max_replicates = 500, balanced = TRUE, psd_option = "warn", - mse = getOption("survey.replicates.mse"), + mse = TRUE, compress = TRUE) { + + if (!mse) { + warning("When `balanced = FALSE`, setting `mse = FALSE` may produce large underestimates of variance.") + } + UseMethod("as_fays_gen_rep_design", design) } @@ -490,8 +497,9 @@ as_fays_gen_rep_design.survey.design <- function(design, variance_estimator = NU max_replicates = 500, balanced = TRUE, psd_option = 'warn', - mse = getOption("survey.replicates.mse"), + mse = TRUE, compress = TRUE) { + # Produce a (potentially) compressed survey design object compressed_design_structure <- compress_design(design, vars_to_keep = aux_var_names) @@ -573,7 +581,7 @@ as_fays_gen_rep_design.DBIsvydesign <- function(design, variance_estimator = NUL max_replicates = 500, balanced = TRUE, psd_option = 'warn', - mse = getOption("survey.replicates.mse"), + mse = TRUE, compress = TRUE) { # Produce a (potentially) compressed survey design object diff --git a/man/as_fays_gen_rep_design.Rd b/man/as_fays_gen_rep_design.Rd index 4763cdc..2efe4fb 100644 --- a/man/as_fays_gen_rep_design.Rd +++ b/man/as_fays_gen_rep_design.Rd @@ -12,7 +12,7 @@ as_fays_gen_rep_design( max_replicates = 500, balanced = TRUE, psd_option = "warn", - mse = getOption("survey.replicates.mse"), + mse = TRUE, compress = TRUE ) } @@ -81,8 +81,10 @@ Beaumont and Patak (2012) argue that this overestimation is expected to be small in magnitude. See \code{\link[svrep]{get_nearest_psd_matrix}} for details of the approximation.} -\item{mse}{If \code{TRUE}, compute variances from sums of squares around the point estimate from the full-sample weights, -If \code{FALSE}, compute variances from sums of squares around the mean estimate from the replicate weights.} +\item{mse}{If \code{TRUE} (the default), compute variances from sums of squares around the point estimate from the full-sample weights, +If \code{FALSE}, compute variances from sums of squares around the mean estimate from the replicate weights. +For Fay's generalized replication method, setting \code{mse = FALSE} can potentially +lead to large underestimates of variance.} \item{compress}{This reduces the computer memory required to represent the replicate weights and has no impact on estimates.} diff --git a/tests/testthat/test-fays-generalized-replication.R b/tests/testthat/test-fays-generalized-replication.R index e8be717..2f5bc55 100644 --- a/tests/testthat/test-fays-generalized-replication.R +++ b/tests/testthat/test-fays-generalized-replication.R @@ -503,6 +503,18 @@ test_that( } ) +# Warnings for ill-advised choices ----- + +test_that( + desc = "Warning when `mse = FALSE`", { + expect_warning({ + twophase_design$phase1$full |> + as_fays_gen_rep_design("Ultimate Cluster", mse = FALSE) |> + svytotal(x = ~ y1) + }, regexp = "may produce large underestimates") + } +) + # Works for more specialized classes of survey designs ---- test_that(