diff --git a/DESCRIPTION b/DESCRIPTION index 6d4bb8b..b8d3c9c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,5 +21,6 @@ Suggests: factoMerger, testthat, knitr, - rmarkdown + rmarkdown, + ResourceSelection VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 5dd3293..e0f1eaf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,22 +1,16 @@ # Generated by roxygen2: do not edit by hand +S3method(extract_xspliner_function,xspliner) S3method(plot,xspliner) S3method(predict,xspliner) S3method(xspline,default) S3method(xspline,explainer) S3method(xspline,formula) -export(extract_formula_var_names) -export(factor_opts_default) -export(get_component_params) -export(get_formula_details) -export(get_formula_raw_components) -export(get_formula_special) -export(get_special_component_details) -export(get_special_components_info) -export(numeric_opts_default) -export(prepare_call) -export(r_squared) -export(transform_formula_chr) -export(transformed_formula_object) +export(aic) +export(extract_xspliner_function) +export(get_special_predictors) +export(hoslem) +export(xf_opts_default) +export(xs_opts_default) export(xspline) importFrom(magrittr,"%>%") diff --git a/R/plot-xspliner.R b/R/plot-xspliner.R index c7d2d16..6bdc200 100644 --- a/R/plot-xspliner.R +++ b/R/plot-xspliner.R @@ -25,7 +25,7 @@ plot.xspliner <- function(x, variable_name = NULL, plot_response = TRUE, } response_var <- environment(x)$response - xp_call <- environment(x)$xs_call[[variable_name]] + xp_call <- environment(x)$xs_functions[[variable_name]] if (plot_data) { plot_range <- range(data[, variable_name]) @@ -35,7 +35,7 @@ plot.xspliner <- function(x, variable_name = NULL, plot_response = TRUE, plot_data_data <- data - plot_response_data <- environment(x)$xs_env_list[[variable_name]]$blackbox_response_obj + plot_response_data <- environment(x)$quantitative_transitions[[variable_name]]$effect_outcome colnames(plot_response_data) <- c(variable_name, response_var) x_approx <- seq(plot_range[1], plot_range[2], length.out = nrow(plot_response_data)) diff --git a/R/predict-xspliner.R b/R/predict-xspliner.R index 8547675..cb29efe 100644 --- a/R/predict-xspliner.R +++ b/R/predict-xspliner.R @@ -5,5 +5,5 @@ #' #' @export predict.xspliner <- function(xspliner, newdata) { - mgcv::predict.gam(xspliner, newdata = newdata) + stats::predict.glm(xspliner, newdata = newdata) } diff --git a/R/utils-approximations.R b/R/utils-approximations.R index b4c6af0..5eb1d90 100644 --- a/R/utils-approximations.R +++ b/R/utils-approximations.R @@ -1,18 +1,18 @@ -get_spline_formula <- function(response_var, pred_var, env, ...) { - formula_call <- substitute(list(pred_var, ...)) +build_approximation_formula <- function(response, predictor, env, ...) { + formula_call <- substitute(list(predictor, ...)) formula_call[[1]] <- quote(s) formula_call[[2]] <- quote(predictor) - formula_call <- sub("predictor", pred_var, deparse(formula_call), fixed = TRUE) - formula <- as.formula(sprintf("%s ~ %s", response_var, formula_call), env = env) + formula_call <- sub("predictor", predictor, deparse(formula_call), fixed = TRUE) + formula <- as.formula(sprintf("%s ~ %s", response, formula_call), env = env) formula } #' Approximate spline on data #' #' It aproximates data with spline function by fitting GAM model. -#' @param bb_response_data Blackbox response data, for example pdp curve. -#' @param response_var Name of response value from bb_response_data. -#' @param pred_var Name of predictor value from bb_response_data. +#' @param effect_data Blackbox response data, for example pdp curve. +#' @param response Name of response value from effect_data. +#' @param predictor Name of predictor value from effect_data. #' @param env Formula environment that should be used for fitting gam model. #' @param ... Other arguments passed to \link{mgcv::s} function. #' @return @@ -22,20 +22,21 @@ get_spline_formula <- function(response_var, pred_var, env, ...) { #' y <- rnorm(20, 2, 2) #' env <- new.env() #' approx_with_spline(data.frame(x = x, y = y), "y", "x", env) -approx_with_spline <- function(bb_response_data, response_var, pred_var, env = parent.frame(), ...) { +approx_with_spline <- function(effect_data, response, predictor, env = parent.frame(), ...) { s <- mgcv::s - formula <- get_spline_formula(response_var, pred_var, env, ...) - mgcv::gam(formula, data = bb_response_data) + formula <- build_approximation_formula(response, predictor, env, ...) + mgcv::gam(formula, data = effect_data) } #' Approximate monotonic spline on data #' #' It aproximates data with monotonic spline function by fitting GAM model. -#' @param bb_response_data. Blackbox response data, for example pdp curve. -#' @param response_var Name of response value from bb_response_data. -#' @param pred_var Name of predictor value from bb_response_data. +#' @param effect_data. Blackbox response data, for example pdp curve. +#' @param response Name of response value from effect_data. +#' @param predictor Name of predictor value from effect_data. #' @param env Formula environment that should be used for fitting gam model. -#' @param increasing If TRUE, spline approximation is increasing, if FALSE decreasing. +#' @param monotonic Possible options "up", "down" and "auto. If up the spline is incresing, when down decreasing. +#' For auto the better one (based on r.squared statistic) is chosen. #' @param ... Other arguments passed to \link{mgcv::s} function. #' @return #' Object of class "gam". See \link{mgcv::gamObject} @@ -43,22 +44,32 @@ approx_with_spline <- function(bb_response_data, response_var, pred_var, env = p #' x <- sort(rnorm(20, 5, 5)) #' y <- rnorm(20, 2, 2) #' env <- new.env() -#' approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, TRUE) -approx_with_monotonic_spline <- function(bb_response_data, response_var, - pred_var, env = parent.frame(), increasing, ...) { +#' approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, "up") +approx_with_monotonic_spline <- function(effect_data, response, + predictor, env = parent.frame(), monotonic, ...) { + if (monotonic == "auto") { + model_up <- approx_with_monotonic_spline(effect_data, response, predictor, env = parent.frame(), "up", ...) + model_down <- approx_with_monotonic_spline(effect_data, response, predictor, env = parent.frame(), "down", ...) + if (summary(model_up)$r.sq > summary(model_up)$r.sq) { + return(model_up) + } else { + return(model_down) + } + } + s <- mgcv::s - formula <- get_spline_formula(response_var, pred_var, env, ...) - G <- mgcv::gam(formula, data = bb_response_data, fit = FALSE) - contraint_sign <- if (increasing) 1 else -1 + formula <- build_approximation_formula(response, predictor, env, ...) + G <- mgcv::gam(formula, data = effect_data, fit = FALSE) + contraint_sign <- if (monotonic == "up") 1 else -1 gam_init <- mgcv::gam(G = G) ## generate constraints, by finite differencing ## using predict.gam .... eps <- 1e-7 - x_range <- range(bb_response_data[[pred_var]]) + x_range <- range(effect_data[[predictor]]) diff_grid_0 <- diff_grid_1 <- data.frame(x = seq(x_range[1], x_range[2], length.out = 100)) - colnames(diff_grid_0) <- colnames(diff_grid_1) <- pred_var - diff_grid_1$x <- diff_grid_1[[pred_var]] + eps + colnames(diff_grid_0) <- colnames(diff_grid_1) <- predictor + diff_grid_1$x <- diff_grid_1[[predictor]] + eps spline_vals_on_interv_start <- predict(gam_init, newdata = diff_grid_0, type = "lpmatrix") spline_vals_on_interv_end <- predict(gam_init, newdata = diff_grid_1, type = "lpmatrix") x_var_constraints <- contraint_sign * (spline_vals_on_interv_end - spline_vals_on_interv_start) / eps ## Xx %*% coef(b) must be positive @@ -75,148 +86,200 @@ approx_with_monotonic_spline <- function(bb_response_data, response_var, gam_init } -prepare_pdp_params <- function() { - -} - -prepare_spline_params_pdp <- function(formula_details, component_details, blackbox, data) { - method_params <- component_details$method_opts - method_params[["type"]] <- NULL - method_params[["object"]] <- blackbox - method_params[["pred.var"]] <- component_details$var - method_params[["train"]] <- data - method_params[["which.class"]] <- 2 # for glm 1st level is failure +prepare_transition_params_pdp <- function(formula_metadata, component_details, blackbox, data) { + effect <- component_details$effect + effect[["type"]] <- NULL + effect[["object"]] <- blackbox + effect[["pred.var"]] <- component_details$var + effect[["train"]] <- data + effect[["which.class"]] <- 2 # for glm 1st level is failure - blackbox_response_obj <- do.call(pdp::partial, method_params) + effect_outcome <- do.call(pdp::partial, effect) - spline_params <- component_details$transform_opts - spline_params[["bb_response_data"]] <- blackbox_response_obj - spline_params[["pred_var"]] <- component_details$var - spline_params[["response_var"]] <- "yhat" - spline_params[["env"]] <- attr(formula_details$formula, ".Environment") + transition <- component_details$transition + transition[["effect_data"]] <- effect_outcome + transition[["predictor"]] <- component_details$var + transition[["response"]] <- "yhat" + transition[["env"]] <- attr(formula_metadata$formula, ".Environment") - spline_params + transition } -prepare_spline_params_ale <- function(formula_details, component_details, blackbox, data) { - method_params <- component_details$method_opts - method_params[["type"]] <- NULL - method_params[["X.model"]] <- blackbox - method_params[["J"]] <- component_details$var - method_params[["X"]] <- data - method_params[["pred.fun"]] <- function(X.model, newdata) predict(object = X.model, newdata = newdata) - method_params[["NA.plot"]] <- FALSE +prepare_transition_params_ale <- function(formula_metadata, component_details, blackbox, data) { + effect <- component_details$effect + effect[["type"]] <- NULL + effect[["X.model"]] <- blackbox + effect[["J"]] <- component_details$var + effect[["X"]] <- data + effect[["pred.fun"]] <- function(X.model, newdata) predict(object = X.model, newdata = newdata) + effect[["NA.plot"]] <- FALSE plot_container <- tempfile() pdf(plot_container) - blackbox_response_obj <- do.call(ALEPlot::ALEPlot, method_params) + effect_outcome <- do.call(ALEPlot::ALEPlot, effect) dev.off() unlink(plot_container) - blackbox_response_obj <- data.frame(blackbox_response_obj$x.values, blackbox_response_obj$f.values) - names(blackbox_response_obj) <- c(component_details$var, "yhat") + effect_outcome <- data.frame(effect_outcome$x.values, effect_outcome$f.values) + names(effect_outcome) <- c(component_details$var, "yhat") - spline_params <- component_details$transform_opts - spline_params[["bb_response_data"]] <- blackbox_response_obj - spline_params[["pred_var"]] <- component_details$var - spline_params[["response_var"]] <- "yhat" - spline_params[["env"]] <- attr(formula_details$formula, ".Environment") + transition <- component_details$transition + transition[["effect_data"]] <- effect_outcome + transition[["predictor"]] <- component_details$var + transition[["response"]] <- "yhat" + transition[["env"]] <- attr(formula_metadata$formula, ".Environment") - spline_params + transition } -prepare_transform_params_fM <- function(formula_details, component_details, blackbox, data) { - method_params <- component_details$method_opts - method_params[["type"]] <- NULL - method_params[["object"]] <- blackbox - method_params[["pred.var"]] <- component_details$var - method_params[["train"]] <- data - method_params[["ice"]] <- TRUE +prepare_transition_params_fM <- function(formula_metadata, component_details, blackbox, data) { + effect <- component_details$effect + effect[["type"]] <- NULL + effect[["object"]] <- blackbox + effect[["pred.var"]] <- component_details$var + effect[["train"]] <- data + effect[["ice"]] <- TRUE - blackbox_response_obj <- do.call(pdp::partial, method_params) + effect_outcome <- do.call(pdp::partial, effect) - transform_params <- component_details$method_opts - transform_params[["response"]] <- blackbox_response_obj[, "yhat"] - transform_params[["factor"]] <- blackbox_response_obj[, component_details$var] - transform_params[["factorMerger"]] <- blackbox_response_obj + transition <- component_details$transition + transition[["response"]] <- effect_outcome[, "yhat"] + transition[["factor"]] <- effect_outcome[, component_details$var] + transition[["factorMerger"]] <- effect_outcome - transform_params + transition } -factor_component_env <- function(formula_details, component_details, blackbox, data) { - if (is.null(component_details$method_opts$type)) { +get_qualitative_transition <- function(formula_metadata, component_details, blackbox, data) { + if (is.null(component_details$effect$type)) { stop("No specified type for method!") } - transform_params <- switch(component_details$method_opts$type, - ice = prepare_transform_params_fM(formula_details, component_details, blackbox, data) + transition <- switch(component_details$effect$type, + fM = prepare_transition_params_fM(formula_metadata, component_details, blackbox, data) ) + alter <- transition[["alter"]] + transition[["alter"]] <- NULL + + if (alter == "never") { + quantitative_transition <- list( + effect_outcome = NULL, + transition_outcome = NULL + ) + } else { + partition_params <- transition[c("stat", "value")] %>% + purrr::keep(~ !is.null(.)) + transition[c("stat", "value")] <- NULL + transition$abbreviate <- FALSE - partition_params <- transform_params[c("stat", "value")] %>% - purrr::keep(~ !is.null(.)) - transform_params[c("stat", "value")] <- NULL - transform_params$abbreviate <- FALSE - - blackbox_response_obj <- do.call(factorMerger::mergeFactors, transform_params) + effect_outcome <- do.call(factorMerger::mergeFactors, transition) - partition_params$factorMerger <- blackbox_response_obj - blackbox_response_transform <- do.call(factorMerger::getOptimalPartitionDf, partition_params) + partition_params$factorMerger <- effect_outcome + transition_outcome <- do.call(factorMerger::getOptimalPartitionDf, partition_params) - list( - blackbox_response_obj = blackbox_response_obj, - blackbox_response_transform = blackbox_response_transform - ) + quantitative_transition <- list( + effect_outcome = transition[["effect_data"]], + transition_outcome = transition_outcome + ) + } + quantitative_transition } -numeric_component_env <- function(formula_details, component_details, blackbox, data) { - if (is.null(component_details$method_opts$type)) { +get_quantitative_transition <- function(formula_metadata, component_details, blackbox, data, + family, compare_stat) { + if (is.null(component_details$effect$type)) { stop("No specified type for method!") } - spline_params <- switch(component_details$method_opts$type, - pdp = prepare_spline_params_pdp(formula_details, component_details, blackbox, data), - ale = prepare_spline_params_ale(formula_details, component_details, blackbox, data) + transition <- switch(component_details$effect$type, + pdp = prepare_transition_params_pdp(formula_metadata, component_details, blackbox, data), + ale = prepare_transition_params_ale(formula_metadata, component_details, blackbox, data) ) - if (is.null(spline_params[["increasing"]])) { - blackbox_response_approx <- do.call(approx_with_spline, spline_params) + monotonic <- transition[["monotonic"]] + transition[["monotonic"]] <- NULL + alter <- transition[["alter"]] + transition[["alter"]] <- NULL + + if (alter == "never") { + quantitative_transition <- list( + effect_outcome = NULL, + transition_outcome = NULL + ) } else { - blackbox_response_approx <- do.call(approx_with_monotonic_spline, spline_params) + if (monotonic == "not") { + transition_outcome <- do.call(approx_with_spline, transition) + } else { # (todo) add auto option + transition_outcome <- do.call(approx_with_monotonic_spline, transition) + } + + quantitative_transition <- list( + effect_outcome = transition[["effect_data"]], + transition_outcome = transition_outcome + ) } - list( - blackbox_response_obj = spline_params[["bb_response_data"]], - blackbox_response_approx = blackbox_response_approx - ) + if (alter == "auto") { + xs_function <- build_xs_function(quantitative_transition, component_details$var) + lm_better <- is_lm_better_than_xs( + data, formula_metadata$lhs, component_details$var, xs_function, family, compare_stat) + if (lm_better) { + message(sprintf("%s fits better than %s. Using bare component.", + component_details$var, component_details$new_call)) + quantitative_transition <- list( + effect_outcome = NULL, + transition_outcome = NULL + ) + } + } + quantitative_transition } -get_common_components_env <- function(formula_details, special_components_details, blackbox, data) { +is_lm_better_than_xs <- function(data, lhs, var, xs_function, family, compare_stat) { + xs_function <- xs_function + linear_formula <- as.formula(sprintf("%s ~ %s", lhs, var)) + transition_formula <- as.formula(sprintf("%s ~ xs_function(%s)", lhs, var)) + linear_model <- glm(linear_formula, data = data, family = family) + transition_model <- glm(transition_formula, data = data, family = family) + comparison <- compare_stat(transition_model) <= compare_stat(linear_model) + if (isTRUE(attr(compare_stat, "higher-better"))) { + lm_better <- comparison + } else { + lm_better <- !comparison + } +} - xs_env <- list() - xf_env <- list() +get_transitions_outcome <- function(formula_metadata, special_components_details, + blackbox, data, family, compare_stat) { - xs_vars <- formula_details$xs_variables - xf_vars <- formula_details$xf_variables + quantitative_variables <- formula_metadata$xs_variables + qualitative_variables <- formula_metadata$xf_variables - if (length(xs_vars)) { - xs_env <- special_components_details %>% - purrr::keep(function(component_details) component_details[["var"]] %in% xs_vars) %>% - purrr::map(function(component_details) numeric_component_env(formula_details, component_details, blackbox, data)) %>% - purrr::set_names(xs_vars) + quantitative <- if (length(quantitative_variables)) { + special_components_details %>% + purrr::keep(function(component_details) component_details[["var"]] %in% quantitative_variables) %>% + purrr::map(function(component_details) get_quantitative_transition( + formula_metadata, component_details, blackbox, data, family, compare_stat)) %>% + purrr::set_names(quantitative_variables) + } else { + list() } - if (length(xf_vars)) { - xf_env <- special_components_details %>% - purrr::keep(function(component_details) component_details[["var"]] %in% xf_vars) %>% - purrr::map(function(component_details) factor_component_env(formula_details, component_details, blackbox, data)) %>% - purrr::set_names(xf_vars) + qualitative <- if (length(qualitative_variables)) { + special_components_details %>% + purrr::keep(function(component_details) component_details[["var"]] %in% qualitative_variables) %>% + purrr::map(function(component_details) get_qualitative_transition( + formula_metadata, component_details, blackbox, data)) %>% + purrr::set_names(qualitative_variables) + } else { + list() } list( - xs_env = xs_env, - xf_env = xf_env + quantitative = quantitative, + qualitative = qualitative ) } @@ -238,10 +301,10 @@ get_common_components_env <- function(formula_details, special_components_detail #' z_var_response <- pdp::partial(blackbox, "z") #' z_var_response_approx <- approx_with_spline(z_var_response, "yhat", "z") #' z_env <- list( -#' blackbox_response_obj = z_var_response, -#' blackbox_response_approx = z_var_response_approx +#' effect_outcome = z_var_response, +#' transition_outcome = z_var_response_approx #' ) -#' z_var_spline <- get_xs_call(z_env, "z") +#' z_var_spline <- build_xs_function(z_env, "z") #' z_range <- attr(z_var_spline, "variable_range") #' z_axis <- seq(z_range[1], z_range[2], length.out = 50) #' @@ -252,93 +315,35 @@ get_common_components_env <- function(formula_details, special_components_detail #' x_var_response <- pdp::partial(blackbox, "x") #' x_var_response_approx <- approx_with_spline(x_var_response, "yhat", "x") #' x_env <- list( -#' blackbox_response_obj = x_var_response, -#' blackbox_response_approx = x_var_response_approx +#' effect_outcome = x_var_response, +#' transition_outcome = x_var_response_approx #' ) -#' x_var_spline <- get_xs_call(x_env, "x") +#' x_var_spline <- build_xs_function(x_env, "x") #' x_range <- attr(x_var_spline, "variable_range") #' x_axis <- seq(x_range[1], x_range[2], length.out = 50) #' #' plot(x, y) #' lines(x_var_response) #' lines(x_axis, x_var_spline(x_axis)) -get_xs_call <- function(xs_env, pred_var_name) { - xs_approximation <- function(pred_var) { - data <- data.frame(pred_var) - names(data) <- pred_var_name - mgcv::predict.gam(xs_env$blackbox_response_approx, newdata = data) +build_xs_function <- function(quantitative_transition, predictor_name) { + xs_approximation <- function(predictor) { + data <- data.frame(predictor) + names(data) <- predictor_name + mgcv::predict.gam(quantitative_transition$transition_outcome, newdata = data) } - attr(xs_approximation, "variable_range") <- range(xs_env$blackbox_response_obj[[pred_var_name]]) + attr(xs_approximation, "variable_range") <- range(quantitative_transition$effect_outcome[[predictor_name]]) xs_approximation } -get_xf_call <- function(xf_env, pred_var_name) { - matched_factors <- xf_env$blackbox_response_transform +build_xf_function <- function(qualitative_transition, predictor_name) { + matched_factors <- qualitative_transition$transition_outcome if (length(unique(matched_factors$pred)) < 2) { - return(function(pred_var) pred_var) + return(function(predictor) predictor) } else { - function(pred_var) { - predictor_values <- data.frame(orig = pred_var) + function(predictor) { + predictor_values <- data.frame(orig = predictor) transformed_predictor <- dplyr::left_join(predictor_values, matched_factors) factor(transformed_predictor, levels = unique(matched_factors$pred)) } } } - -is_lm_better_than_approx <- function(data, response, predictor, approx_fun, compare_stat) { - approx_model_formula <- as.formula(sprintf("%s ~ approx_fun(%s)", response, predictor)) - approx_model <- lm(approx_model_formula, data) - lm_model_formula <- as.formula(sprintf("%s ~ %s", response, predictor)) - lm_model <- lm(lm_model_formula, data) - comparison <- compare_stat(approx_model) <= compare_stat(lm_model) - if (isTRUE(attr(compare_stat, "higher-better"))) { - comparison - } else { - !comparison - } -} - -correct_improved_components <- function(alter, compare_stat, xs, xf, special_components_details, data, response) { - - get_component_call <- function(special_component_details) { - if (special_component_details$call_fun == "xs") { - xs - } else { - xf - } - } - - use_untransformed <- function(special_component_details) { - call_fun <- special_component_details$call_fun - if (call_fun == "xs") { - alter_variable <- switch(alter$numeric, - always = FALSE, - auto = is_lm_better_than_approx(data, response, special_component_details$var, - get_component_call(special_component_details), compare_stat), - never = TRUE - ) - } else if (call_fun == "xf") { - alter_variable <- switch(alter$factor, - always = FALSE, - never = TRUE - ) - } else { - alter_variable <- TRUE - } - - if (is.null(alter_variable)) { - alter_variable <- TRUE - } - - alter_variable - } - - use_bare_call <- function(special_component_details) { - special_component_details$new_call <- special_component_details$var - special_component_details - } - - special_components_details %>% - purrr::map_if(use_untransformed, use_bare_call) - -} diff --git a/R/utils-formula-build.R b/R/utils-formula-build.R index 7959cc8..5be07af 100644 --- a/R/utils-formula-build.R +++ b/R/utils-formula-build.R @@ -1,4 +1,3 @@ -#' @export extract_formula_var_names <- function(formula, data) { formula_variables <- all.vars(formula) @@ -29,13 +28,12 @@ extract_formula_var_names <- function(formula, data) { formula_variables } -#' @export -get_formula_raw_components <- function(formula_terms) { +get_formula_single_components <- function(formula_terms) { as.character(attr(formula_terms,"variables"))[-c(1, 2)] } #' @export -get_formula_special <- function(variable_names, formula_terms, special, index = FALSE) { +get_special_predictors <- function(variable_names, formula_terms, special, index = FALSE) { if (index) { attr(formula_terms, "specials")[[special]] - 1 } else { @@ -43,8 +41,7 @@ get_formula_special <- function(variable_names, formula_terms, special, index = } } -#' @export -get_formula_details <- function(formula, variable_names) { +get_formula_metadata <- function(formula, variable_names) { if (!(length(formula) == 3)) { stop("Not specified response in formula") @@ -52,27 +49,26 @@ get_formula_details <- function(formula, variable_names) { formula_terms <- terms.formula(formula, specials = c("xs", "xf")) - formula_details <- list( + formula_metadata <- list( formula = formula, response = variable_names[1], predictors = variable_names[-1], lhs = get_formula_lhs(formula), rhs = get_formula_rhs(formula), - additive_components = get_formula_raw_components(formula_terms), - xs_variables = get_formula_special(variable_names, formula_terms, "xs"), - xf_variables = get_formula_special(variable_names, formula_terms, "xf"), - xs_variables_idx = get_formula_special(variable_names, formula_terms, "xs", TRUE), - xf_variables_idx = get_formula_special(variable_names, formula_terms, "xf", TRUE) + additive_components = get_formula_single_components(formula_terms), + xs_variables = get_special_predictors(variable_names, formula_terms, "xs"), + xf_variables = get_special_predictors(variable_names, formula_terms, "xf"), + xs_variables_idx = get_special_predictors(variable_names, formula_terms, "xs", TRUE), + xf_variables_idx = get_special_predictors(variable_names, formula_terms, "xf", TRUE) ) - if (length(formula_details$predictors) != length(formula_details$additive_components)) { + if (length(formula_metadata$predictors) != length(formula_metadata$additive_components)) { stop("Number of predictors is different than additive components") } - formula_details + formula_metadata } -#' @export -prepare_call <- function(component_string, add_variable = TRUE) { +clear_special_component <- function(component_string, add_variable = TRUE) { fun <- substr(component_string, 1, 2) if (!(fun %in% c("xs", "xf"))) { return(component_string) @@ -88,54 +84,60 @@ prepare_call <- function(component_string, add_variable = TRUE) { } -#' @export -get_component_params <- function(additive_component, env) { +get_component_params <- function(additive_component, xs_opts, xf_opts, env) { spline_params <- as.list(parse(text = additive_component[1])[[1]]) - transform_opts <- eval(spline_params$transform_opts, envir = env) - method_opts <- eval(spline_params$method_opts, envir = env) + transition <- eval(spline_params$transition, envir = env) + effect <- eval(spline_params$effect, envir = env) + transition_type <- substr(additive_component, 1, 2) + if (transition_type == "xs") { + transition <- match_parameters(transition, xs_opts$transition, xs_opts_default$transition) + effect <- match_parameters(effect, xs_opts$effect, xs_opts_default$effect) + } else { + transition <- match_parameters(transition, xf_opts$transition, xf_opts_default$transition) + effect <- match_parameters(effect, xf_opts$effect, xf_opts_default$effect) + } + list( - transform_opts = transform_opts, - method_opts = method_opts + transition = transition, + effect = effect ) } -#' @export -get_special_component_details <- function(raw_variable_name, additive_component_chr, env) { +get_special_component_metadata <- function(raw_variable_name, additive_component_chr, xs_opts, xf_opts, env) { - transformed_component <- prepare_call(additive_component_chr) - call_function <- prepare_call(additive_component_chr, FALSE) - component_params <- get_component_params(additive_component_chr, env) + transformed_component <- clear_special_component(additive_component_chr) + call_function <- clear_special_component(additive_component_chr, FALSE) + component_params <- get_component_params(additive_component_chr, xs_opts, xf_opts, env) component_details <- list( var = raw_variable_name, call = additive_component_chr, new_call = transformed_component, call_fun = call_function, - transform_opts = component_params$transform_opts, - method_opts = component_params$method_opts + transition = component_params$transition, + effect = component_params$effect ) component_details } -#' @export -get_special_components_info <- function(formula_details) { - special_predictor_indexes <- c(formula_details$xs_variables_idx, formula_details$xf_variables_idx) - special_predictor_names <- formula_details$predictors[special_predictor_indexes] - special_predictor_additive_components <- formula_details$additive_components[special_predictor_indexes] +collect_specials_metadata <- function(formula_metadata, xs_opts, xf_opts) { + special_predictor_indexes <- c(formula_metadata$xs_variables_idx, formula_metadata$xf_variables_idx) + special_predictor_names <- formula_metadata$predictors[special_predictor_indexes] + special_predictor_additive_components <- formula_metadata$additive_components[special_predictor_indexes] - special_component_details <- purrr::map2( + special_component_metadata <- purrr::map2( special_predictor_names, special_predictor_additive_components, - get_special_component_details, - env = attr(formula_details$formula, ".Environment") + get_special_component_metadata, + xs_opts = xs_opts, xf_opts = xf_opts, + env = attr(formula_metadata$formula, ".Environment") ) - names(special_component_details) <- special_predictor_names - special_component_details + names(special_component_metadata) <- special_predictor_names + special_component_metadata } -#' @export -transform_formula_chr <- function(formula_details, special_components_details) { +transform_formula_chr <- function(formula_metadata, special_components_details) { replace_component_call <- function(rhs_string_formula, component_details) { sub(component_details$call, component_details$new_call, rhs_string_formula, fixed = TRUE) @@ -144,82 +146,271 @@ transform_formula_chr <- function(formula_details, special_components_details) { transformed_rhs <- purrr::reduce( special_components_details, replace_component_call, - .init = formula_details$rhs + .init = formula_metadata$rhs ) - sprintf("%s ~ %s", formula_details$lhs, transformed_rhs) + sprintf("%s ~ %s", formula_metadata$lhs, transformed_rhs) } -#' @export -transformed_formula_object <- function(formula_details, blackbox, data, alter, compare_stat) { - - special_components_details <- get_special_components_info(formula_details) - transformed_formula_calls <- get_common_components_env(formula_details, special_components_details, blackbox, data) +correct_improved_components <- function(special_components_details, transformed_variables) { - xs_env_list <- transformed_formula_calls$xs_env - xs_call <- purrr::map2(xs_env_list, names(xs_env_list), get_xs_call) %>% - purrr::set_names(names(xs_env_list)) - xs <- function(variable) { - var_name <- deparse(substitute(variable)) - xs_call[[var_name]](variable) + use_untransformed <- function(special_component_metadata) { + !(special_component_metadata$var %in% transformed_variables) } - xf_env_list <- transformed_formula_calls$xf_env - xf_call <- purrr::map2(xf_env_list, names(xf_env_list), get_xf_call) %>% - purrr::set_names(names(xf_env_list)) - xf <- function(variable) { - var_name <- deparse(substitute(variable)) - xf_call[[var_name]](variable) + use_bare_call <- function(special_component_metadata) { + special_component_metadata$new_call <- special_component_metadata$var + special_component_metadata } - transformed_formula_env <- attr(formula_details$formula, ".Environment") - transformed_formula_env$xs <- xs - transformed_formula_env$xf <- xf - transformed_formula_env$xs_call <- xs_call - transformed_formula_env$xf_call <- xf_call - transformed_formula_env$xs_env_list <- xs_env_list - transformed_formula_env$xf_env_list <- xf_env_list - transformed_formula_env$response <- formula_details$response - special_components_details <- correct_improved_components( - alter, compare_stat, xs, xf, special_components_details, data, formula_details$lhs) - transformed_formula_string <- transform_formula_chr(formula_details, special_components_details) + special_components_details %>% + purrr::map_if(use_untransformed, use_bare_call) - as.formula(transformed_formula_string, env = transformed_formula_env) } -#' @export -factor_opts_default = list( - method_opts = list(type = "ice"), - transform_opts = list(stat = "GIC", value = 3) -) - -#' @export -numeric_opts_default = list( - method_opts = list(type = "pdp"), - transform_opts = list(k = 6) -) - -build_predictor_component <- function(predictor, class, factor_opts, numeric_opts) { +add_special_to_predictor <- function(predictor, class) { if (!(class %in% c("numeric", "integer", "factor"))) { stop("Wrong class passed.") } if (class == "factor") { - sprintf( - "xf(%s, method_opts = %s, transform_opts = %s)", - predictor, deparse(factor_opts$method_opts), deparse(factor_opts$transform_opts) - ) + sprintf("xf(%s)", predictor) } else { - sprintf( - "xs(%s, method_opts = %s, transform_opts = %s)", - predictor, deparse(numeric_opts$method_opts), deparse(numeric_opts$transform_opts) - ) + sprintf("xs(%s)", predictor) } } -build_formula <- function(response, predictors, classes, factor_opts, numeric_opts) { +build_predictor_based_formula <- function(response, predictors, classes, form = "additive") { + if (form == "additive") { + collapse = " + " + } else { + collapse = " * " + } rhs <- purrr::map2_chr( - predictors, classes, build_predictor_component, factor_opts = factor_opts, numeric_opts = numeric_opts) %>% - paste(collapse = " + ") + predictors, classes, add_special_to_predictor) %>% + paste(collapse = collapse) sprintf("%s ~ %s", response, rhs) } + +get_predictors_classes <- function(data) { + purrr::map_chr(1:ncol(data), function(x) class(data[, x])) +} + +try_get <- function(possible) { + possible_response <- try(possible, silent = TRUE) + if (class(possible_response) != "try-error") { + possible_response + } else { + NULL + } +} + +get_model_data <- function(model, data, env = parent.frame()) { + if (is.null(data)) { + data <- try_get(eval(stats::getCall(model)$data, envir = env)) + } + if (is.null(data)) { + stop("Data must be provided.") + } + data +} + +get_model_response <- function(model, data, response) { + if (is.null(response)) { + response <- try_get(all.vars(model$terms[[2]])) + } + if (!is.null(response)) { + response_in_data <- response %in% colnames(data) + if (!all(response_in_data)) { + stop("Response not found in data") + } + response <- response[response_in_data] + } else { + stop("Cannot extract model lhs") + } + response +} + +get_model_lhs <- function(model, lhs) { + if (is.null(lhs)) { + lhs <- try_get(deparse(model$terms[[2]])) + } + if (is.null(lhs)) { + lhs <- try_get(colnames(model.frame(model))[1]) + } + if (is.null(lhs)) { + stop("Cannot extract model lhs") + } + lhs +} + +get_model_predictors <- function(model, data, predictors, response) { + if (is.null(predictors)) { + predictors <- try_get(all.vars(model$terms[[3]])) + } + if (!is.null(predictors)) { + predictors_in_data <- predictors %in% colnames(data) + if (!all(predictors_in_data)) { + stop("Not all predictors found in data") + } + predictors <- predictors[predictors_in_data] + } else { + predictors <- setdiff(colnames(data), response) + } + predictors +} + +get_model_type <- function(model, data) { + response <- get_model_response(model, data, NULL) + + if (inherits(data[[response]], "factor")) { + type <- "classification" + } else if (inherits(data[[response]], "integer") && (length(unique(data[[response]])) <= 2)) { + type <- "classification" + } else { + type <- "regression" + } + type +} + +get_model_family <- function(model, family, type) { + + model_family <- try_get( + match.fun(eval(stats::getCall(model)$family)$family) + ) + + if (is.null(model_family)) { + if (type == "classification") { + model_family <- match.fun("binomial") + } else { + if (is.character(family)) { + model_family <- match.fun(family) + family_name <- family + } else { + model_family <- match.fun(family$family) + family_name <- family$family + } + message(sprintf("Cannot extract model family. Use %s.", family_name)) + } + } + + model_family +} + +get_model_link <- function(model, link, type) { + model_link <- try_get( + eval(stats::getCall(model)$family)$link + ) + + if (is.null(model_link)) { + if (type == "classification") { + model_link <- "logit" + } else { + if (is.null(model_link)) { + message(sprintf("Cannot extract model link. Use %s.", link)) + model_link <- link + } + } + } + + model_link +} + +get_formula_lhs <- function(formula) { + deparse(formula[[2]], width.cutoff = 500) +} + +get_formula_rhs <- function(formula) { + gsub("\\s+", " ", trimws(paste0(deparse(formula[[3]]), collapse = ""))) +} + +get_formula_predictors <- function(formula, data, predictors, response) { + if (is.null(predictors)) { + predictors <- all.vars(formula[[3]]) + } + if (!is.null(predictors)) { + predictors_in_data <- predictors %in% colnames(data) + if (!all(predictors_in_data)) { + stop("Not all predictors found in data") + } + predictors <- predictors[predictors_in_data] + } else { + predictors <- setdiff(colnames(data), response) + } + predictors +} + +get_formula_response <- function(formula, data, response) { + if (is.null(response)) { + response <- try_get(all.vars(formula[[2]])) + } + if (!is.null(response)) { + response_in_data <- response %in% colnames(data) + if (!all(response_in_data)) { + stop("Response not found in data") + } + response <- response[response_in_data] + } else { + stop("Cannot extract formula lhs") + } + response +} + +add_specials_to_formula <- function(formula_components, data, omit = c("xs", "xf", "+", "*", ":", "~")) { + if (length(formula_components) == 1 && !(as.character(formula_components)[1] %in% omit)) { + if (class(data[[as.character(formula_components)]]) == "factor") { + template <- quote(xf(var)) + } else { + template <- quote(xs(var)) + } + template[[2]] <- formula_components + formula_components <- template + formula_components <- formula_components + } else if (length(formula_components) >= 3) { + if (as.character(formula_components[[1]]) %in% omit[-c(1, 2)]) { + for (i in seq_along(formula_components)) { + formula_components[[i]] <- add_specials_to_formula(formula_components[[i]], data, omit) + } + } + } + formula_components +} + +transformed_formula_object <- function(formula_metadata, blackbox, data, family, xs_opts, xf_opts, compare_stat) { + + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) + transitions <- get_transitions_outcome(formula_metadata, special_components_details, blackbox, data, + family, compare_stat) + + quantitative_transitions <- transitions$quantitative %>% + purrr::keep(~ !(is.null(.$transition_outcome))) + xs_functions <- purrr::map2(quantitative_transitions, names(quantitative_transitions), build_xs_function) %>% + purrr::set_names(names(quantitative_transitions)) + xs <- function(variable) { + var_name <- deparse(substitute(variable)) + xs_functions[[var_name]](variable) + } + + qualitative_transitions <- transitions$qualitative %>% + purrr::keep(~ !(is.null(.$transition_outcome))) + xf_functions <- purrr::map2(qualitative_transitions, names(qualitative_transitions), build_xf_function) %>% + purrr::set_names(names(qualitative_transitions)) + xf <- function(variable) { + var_name <- deparse(substitute(variable)) + xf_functions[[var_name]](variable) + } + + transformed_formula_env <- attr(formula_metadata$formula, ".Environment") + transformed_formula_env$xs <- xs + transformed_formula_env$xf <- xf + transformed_formula_env$xs_functions <- xs_functions + transformed_formula_env$xf_functions <- xf_functions + transformed_formula_env$quantitative_transitions <- quantitative_transitions + transformed_formula_env$qualitative_transitions <- qualitative_transitions + transformed_formula_env$response <- formula_metadata$response + + variables_to_transform <- c(names(quantitative_transitions), names(qualitative_transitions)) + special_components_details <- correct_improved_components(special_components_details, variables_to_transform) + transformed_formula_string <- transform_formula_chr(formula_metadata, special_components_details) + as.formula(transformed_formula_string, env = transformed_formula_env) +} diff --git a/R/utils-model.R b/R/utils-model.R new file mode 100644 index 0000000..1dff9dd --- /dev/null +++ b/R/utils-model.R @@ -0,0 +1,19 @@ +#' @export +extract_xspliner_function <- function(object, ...) { + UseMethod("extract_xspliner_function", object) +} + +#' @export +extract_xspliner_function.xspliner <- function(model, predictor) { + quantity_transition_function <- environment(model)$xs_functions[[predictor]] + quality_transition_function <- environment(model)$xf_functions[[predictor]] + if (!is.null(quantity_transition_function)) { + return(quantity_transition_function) + } else if (!is.null(quantity_transition_function)) { + return(quantity_transition_function) + } else { + message("Variable is not transformed. Use identity.") + return(function(x) x) + } +} + diff --git a/R/utils.R b/R/utils.R index 27c2bcf..8fbf8a3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,163 +1,9 @@ -get_predictors_classes <- function(data) { - purrr::map_chr(1:ncol(data), function(x) class(data[, x])) -} - -try_get <- function(possible) { - possible_response <- try(possible, silent = TRUE) - if (class(possible_response) != "try-error") { - possible_response - } else { - NULL - } -} - -get_model_data <- function(model, data, env = parent.frame()) { - if (is.null(data)) { - data <- try_get(eval(stats::getCall(model)$data, envir = env)) - } - if (is.null(data)) { - stop("Data must be provided.") - } - data -} - -get_model_response <- function(model, data, response) { - if (is.null(response)) { - response <- try_get(all.vars(model$terms[[2]])) - } - if (!is.null(response)) { - response_in_data <- response %in% colnames(data) - if (!all(response_in_data)) { - stop("Response not found in data") - } - response <- response[response_in_data] - } else { - stop("Cannot extract model lhs") - } - response -} - -get_model_lhs <- function(model, lhs) { - if (is.null(lhs)) { - lhs <- try_get(deparse(model$terms[[2]])) - } - if (is.null(lhs)) { - lhs <- try_get(colnames(model.frame(model))[1]) - } - if (is.null(lhs)) { - stop("Cannot extract model lhs") - } - lhs -} - -get_model_predictors <- function(model, data, predictors, response) { - if (is.null(predictors)) { - predictors <- try_get(all.vars(model$terms[[3]])) - } - if (!is.null(predictors)) { - predictors_in_data <- predictors %in% colnames(data) - if (!all(predictors_in_data)) { - stop("Not all predictors found in data") - } - predictors <- predictors[predictors_in_data] - } else { - predictors <- setdiff(colnames(data), response) - } - predictors -} - -get_model_type <- function(model, data) { - response <- get_model_response(model, data, NULL) - - if (inherits(data[[response]], "factor")) { - type <- "classification" - } else if (inherits(data[[response]], "integer") && (length(unique(data[[response]])) <= 2)) { - type <- "classification" - } else { - type <- "regression" - } - type -} - -get_model_family <- function(model, family, type) { +match_parameters <- function(local, global, default) { + local_parameters <- names(local) + global_parameters <- setdiff(names(global), local_parameters) + default_parameters <- setdiff(setdiff(names(default), global_parameters), local_parameters) - model_family <- try_get( - match.fun(eval(stats::getCall(model)$family)$family) + append(local, + append(global[global_parameters], default[default_parameters]) ) - - if (is.null(model_family)) { - if (type == "classification") { - model_family <- match.fun("binomial") - } else { - if (is.character(family)) { - model_family <- match.fun(family) - family_name <- family - } else { - model_family <- match.fun(family$family) - family_name <- family$family - } - message(sprintf("Cannot extract model family. Use %s.", family_name)) - } - } - - model_family -} - -get_model_link <- function(model, link, type) { - model_link <- try_get( - eval(stats::getCall(model)$family)$link - ) - - if (is.null(model_link)) { - if (type == "classification") { - model_link <- "logit" - } else { - if (is.null(model_link)) { - message(sprintf("Cannot extract model link. Use %s.", link)) - model_link <- link - } - } - } - - model_link -} - -get_formula_lhs <- function(formula) { - deparse(formula[[2]], width.cutoff = 500) -} - -get_formula_rhs <- function(formula) { - gsub("\\s+", " ", trimws(paste0(deparse(formula[[3]]), collapse = ""))) -} - -get_formula_predictors <- function(formula, data, predictors, response) { - if (is.null(predictors)) { - predictors <- all.vars(formula[[3]]) - } - if (!is.null(predictors)) { - predictors_in_data <- predictors %in% colnames(data) - if (!all(predictors_in_data)) { - stop("Not all predictors found in data") - } - predictors <- predictors[predictors_in_data] - } else { - predictors <- setdiff(colnames(data), response) - } - predictors -} - -get_formula_response <- function(formula, data, response) { - if (is.null(response)) { - response <- try_get(all.vars(formula[[2]])) - } - if (!is.null(response)) { - response_in_data <- response %in% colnames(data) - if (!all(response_in_data)) { - stop("Response not found in data") - } - response <- response[response_in_data] - } else { - stop("Cannot extract formula lhs") - } - response } diff --git a/R/xspline.R b/R/xspline.R index ef9580e..8b95d34 100644 --- a/R/xspline.R +++ b/R/xspline.R @@ -12,40 +12,40 @@ xspline <- function(object, ...) { } #' @param model Predictive model. Basic model used for extracting predictors transformation. +#' @param lhs Left-hand side of model formula. Can be transformed response. #' @param response Name of response variable of \code{model}. #' @param predictors Predictor values that should be used in final model. #' @param data Training data of \code{model}. +#' @param form Can be 'additive' (default) or 'multiplicative'. Specifies formula form in final model. #' @param env Environment in which optional variables passed into parameters are stored. -#' @param factor_opts Formula parameters used for factor variable transformatoins inherited from factorMerger package. -#' @param numeric_opts Predictive model response method and approximation parameters used for numeric #' variables transformation. See vignette("xspliner") for details. #' #' @rdname xspline #' @export -xspline.default <- function(model, lhs = NULL, response = NULL, predictors = NULL, data = NULL, env = parent.frame(), - factor_opts = factor_opts_default, numeric_opts = numeric_opts_default, ...) { +xspline.default <- function(model, lhs = NULL, response = NULL, predictors = NULL, data = NULL, + form = "additive", env = parent.frame(), ...) { data <- get_model_data(model, data, env) lhs <- get_model_lhs(model, lhs) predictors <- get_model_predictors(model, data, predictors, get_model_response(model, data, response)) classes <- get_predictors_classes(data[, predictors]) formula <- as.formula( - build_formula(lhs, predictors, classes, factor_opts, numeric_opts), + build_predictor_based_formula(lhs, predictors, classes, form), env = env) build_xspliner(formula, model, data, env = env, ...) } #' @param formula xspliner-specific formula object. Check vignette("xspliner") for more details. -#' @param exact If TRUE, exact formula call is used. If not all formula variables are altered. +#' @param consider One of \code{c("specials", "all")}. If "specials", only components with xs or xf +#' call are considered in transition. #' @rdname xspline #' @export -xspline.formula <- function(formula, model, data = NULL, exact = TRUE, env = parent.frame(), ...) { +xspline.formula <- function(formula, model, data = NULL, consider = "specials", env = parent.frame(), ...) { data <- get_model_data(model, data, env) - formula_lhs <- get_formula_lhs(formula) model_lhs <- get_model_lhs(model, NULL) if (model_lhs != formula_lhs) { - warning("Model and formula lhs's must be the same. Using lhs from model.") + message("Model and formula lhs's must be the same. Using lhs from model.") formula[[2]] <- model$terms[[2]] } @@ -58,11 +58,11 @@ xspline.formula <- function(formula, model, data = NULL, exact = TRUE, env = par if (!(all(formula_predictors %in% model_predictors))) { stop("Not all variables from formula are included in model.") } - if (exact) { + if (consider == "specials") { build_xspliner(formula, model, data, env = env, ...) } else { - lhs <- get_formula_lhs(formula) - xspline.default(model, lhs, NULL, formula_predictors, data, env = env, ...) + formula[[3]] <- add_specials_to_formula(formula[[3]], data) + xspline.formula(formula, model, data = NULL, consider = "specials", env = env, ...) } } } @@ -79,45 +79,78 @@ xspline.explainer <- function(explainer, env = parent.frame(), ...) { #' @param formula xspliner-specific formula object. Check vignette("xspliner") for more details. #' @param model Predictive model. Basic model used for extracting predictors transformation. #' @param data Training data of \code{model}. +#' @param xf_opts Formula parameters used for factor variable transformatoins inherited from factorMerger package. +#' @param xs_opts Predictive model response method and approximation parameters used for quantitative. #' @param link Link function that should be used in final model. The passed is used when cannot be extracted from #' model. By default 'identity'. See \link{stats::family} for possibilities. #' @param family Family of response variable that should be used in final model. The passed is used when cannot #' be extracted from model. By default 'gaussian'. See \link{stats::family} for possibilities. #' @param env Environment in which optional variables passed into parameters are stored. -#' @param alter Specifies if each variable transformation should be compared with bare variable usage. -#' List of the form: list(numeric = type, factor = type), type one of c('always', 'auto', 'never). -#' 'auto' option available only for qualitative variable. The better model is specified by \code{compare_stat} parameter then. #' @param compare_stat Function of linear model (lm function output). Statistic that measures if linear model is better that transformed one. #' See \link{stats}. #' -build_xspliner <- function(formula, model, data, link = "identity", family = "gaussian", env = parent.frame(), - alter = list(numeric = 'always', factor = 'never'), compare_stat = r_squared) { +build_xspliner <- function(formula, model, data, xf_opts = xf_opts_default, xs_opts = xs_opts_default, + link = "identity", family = "gaussian", env = parent.frame(), compare_stat = aic) { formula_environment <- new.env(parent = env) attr(formula, ".Environment") <- formula_environment - formula_details <- get_formula_details(formula, extract_formula_var_names(formula, data)) - cleared_formula <- transformed_formula_object(formula_details, model, data, alter, compare_stat) + formula_metadata <- get_formula_metadata(formula, extract_formula_var_names(formula, data)) type <- get_model_type(model, data) model_family <- get_model_family(model, family, type) model_link <- get_model_link(model, link, type) - gam_model <- glm(cleared_formula, data = data, family = model_family(link = model_link)) - gam_model + family <- model_family(link = model_link) + cleared_formula <- transformed_formula_object(formula_metadata, model, data, family, xs_opts, xf_opts, compare_stat) + glm_model <- glm(cleared_formula, data = data, family = family) + environment(glm_model) <- attr(cleared_formula, ".Environment") + class(glm_model) <- c("xspliner", class(glm_model)) + glm_model } +#' Default parameters for transition methods +#' +#' While constructing formula interpreted by xspliner package, some parameters may be specified within xs(..) or xf(..) symbols. +#' Below are default parameters. See details in \code{vignette("xspliner")} +#' +#' @export +xf_opts_default = list( + effect = list(type = "fM"), + transition = list(alter = "newer", stat = "GIC", value = 3) +) + +#' @rdname xf_opts_default +#' @export +xs_opts_default = list( + effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not") +) + #' Statistics used for better linear model selection #' #' Used as \code{compare_stat} parameter in \code{xspline} method. #' Each function has attribute "higher-better". #' If "higher-better" is TRUE then model with higher statistic value is treated as better one. #' -#' @param lm_model Linear model - \code{lm} function output. +#' @param glm_model Linear model - \code{glm} function output. +#' @param family Family used for fitting the model. #' @name stats NULL -#' Calculate R Squared for linear model. +#' Calculate AIC for glm model. #' #' @rdname stats #' @export -r_squared <- function(lm_model) { - summary(lm_model)$r.squared +aic <- function(glm_model) { + summary(glm_model)$aic +} +attr(aic, "higher-better") <- FALSE + +#' Calculate Hosmer-Lemeshow Goodness of Fit for glm model. +#' +#' @rdname stats +#' @export +hoslem <- function(glm_model) { + if (glm_model$family$family != "binomial") { + stop("Not classification model.") + } + ResourceSelection::hoslem.test(glm_model$model[, 1], fitted(glm_model))$statistic } -attr(r_squared, "higher-better") <- TRUE +attr(hoslem, "higher-better") <- TRUE diff --git a/docs/articles/xspliner.html b/docs/articles/xspliner.html index b399ebb..cfbb36a 100644 --- a/docs/articles/xspliner.html +++ b/docs/articles/xspliner.html @@ -61,7 +61,7 @@

xspliner package - basic theory and usage

Krystian Igras

-

2018-11-25

+

2018-11-30

@@ -151,7 +151,7 @@

Specifying which method should we use to build response function

-
method_opts = list(
+
effect = list(
   type = <method_type> # "pdp" or "ale",
   ... # named list - other parameters passed for chosen method
 )
@@ -170,8 +170,8 @@

Specifying spline approximation parameters

Response function is approximated with mgcv::gam package and mgcv::s smoothing function.

xspliner allows using all smoothing methods provided by mgcv::s. You can pass corresponding parameters in

-
transform_opts = <mgcv::s parameters> # named list
-

Remark One of special parameters passed for transform_opts is increasing. When the parameter occurs then:

+
transition = <mgcv::s parameters> # named list
+

Remark One of special parameters passed for transition is increasing. When the parameter occurs then:

  • for TRUE value, approximation will be increasing
  • for FALSE value, approximation will be decreasing
    @@ -185,12 +185,12 @@

    yhat - response function values on points specified in the first column

As we want to store 100 values for response function data, we specify in formula:

-
cmedv ~ rm + lstat + xs(nox, method_opts = list(type = "pdp", grid.resolution = 100))
+
cmedv ~ rm + lstat + xs(nox, effect = list(type = "pdp", grid.resolution = 100))

We also want to approximate response function with cubic splines and basis dimension equal to 10. As we can see in mgcv::s documentation, we need to set: k = 10 and bs = "cr".

So we get the final formula:

cmedv ~ rm + lstat + xs(nox, 
-  method_opts = list(type = "pdp", grid.resolution = 100),
-  transform_opts = list(k = 10, bs = "cr"))
+ effect = list(type = "pdp", grid.resolution = 100), + transition = list(k = 10, bs = "cr"))
@@ -204,21 +204,23 @@

xp_model <- xspline(
   cmedv ~ rm + lstat +
     xs(nox, 
-       method_opts = list(type = "pdp", grid.resolution = 100), 
-       transform_opts = list(k = 10, bs = "cr")),
+       effect = list(type = "pdp", grid.resolution = 100), 
+       transition = list(k = 10, bs = "cr")),
   model = boston_rf
-)
+) +#> Cannot extract model family. Use gaussian. +#> Cannot extract model link. Use identity.

Lets check the model summary:

summary(xp_model)
 #> 
-#> Family: gaussian 
-#> Link function: identity 
+#> Call:
+#> glm(formula = cleared_formula, family = family, data = data)
 #> 
-#> Formula:
-#> cmedv ~ rm + lstat + xs(nox)
-#> <environment: 0x556a5185c8b0>
+#> Deviance Residuals: 
+#>      Min        1Q    Median        3Q       Max  
+#> -13.3754   -3.3141   -0.9339    2.0499   27.1722  
 #> 
-#> Parametric coefficients:
+#> Coefficients:
 #>              Estimate Std. Error t value Pr(>|t|)    
 #> (Intercept) -32.40688    4.84433  -6.690 5.98e-11 ***
 #> rm            5.26616    0.41581  12.665  < 2e-16 ***
@@ -227,9 +229,13 @@ 

#> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> -#> -#> R-sq.(adj) = 0.682 Deviance explained = 68.4% -#> GCV = 26.984 Scale est. = 26.771 n = 506

+#> (Dispersion parameter for gaussian family taken to be 26.77072) +#> +#> Null deviance: 42578 on 505 degrees of freedom +#> Residual deviance: 13439 on 502 degrees of freedom +#> AIC: 3105.3 +#> +#> Number of Fisher Scoring iterations: 2

As you can see in the summary, the final formula is simplified version of input one. All the information about the response functions are stored in xp_model environment.

@@ -252,25 +258,25 @@

boston_rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) model_pdp_auto <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston_rf, - alter = list(numeric = 'auto', factor = 'never') + xs_opts = list(transition = list(alter = "auto")) ) # r_squared statistic is used by default summary(model_pdp_auto) #> -#> Family: gaussian -#> Link function: identity +#> Call: +#> glm(formula = cleared_formula, family = family, data = data) #> -#> Formula: -#> cmedv ~ xs(lstat) + ptratio + age -#> <environment: 0x556a554fb0b8> +#> Deviance Residuals: +#> Min 1Q Median 3Q Max +#> -12.7619 -3.2031 -0.6366 2.8787 26.9953 #> -#> Parametric coefficients: +#> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 4.638819 2.952287 1.571 0.117 #> xs(lstat) 1.248040 0.048545 25.709 < 2e-16 *** @@ -279,9 +285,13 @@

#> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> -#> -#> R-sq.(adj) = 0.705 Deviance explained = 70.7% -#> GCV = 25.075 Scale est. = 24.877 n = 506

+#> (Dispersion parameter for gaussian family taken to be 24.87722) +#> +#> Null deviance: 42578 on 505 degrees of freedom +#> Residual deviance: 12488 on 502 degrees of freedom +#> AIC: 3068.2 +#> +#> Number of Fisher Scoring iterations: 2

Linear approximation was better for ptratio response function.

diff --git a/docs/reference/approx_with_monotonic_spline.html b/docs/reference/approx_with_monotonic_spline.html index b650931..430d1e9 100644 --- a/docs/reference/approx_with_monotonic_spline.html +++ b/docs/reference/approx_with_monotonic_spline.html @@ -89,34 +89,35 @@

Approximate monotonic spline on data

It aproximates data with monotonic spline function by fitting GAM model.

-
approx_with_monotonic_spline(bb_response_data, response_var, pred_var,
-  env = parent.frame(), increasing, ...)
+
approx_with_monotonic_spline(effect_data, response, predictor,
+  env = parent.frame(), monotonic, ...)

Arguments

- - + + - - + + - - + + - +
response_var

Name of response value from bb_response_data.

response

Name of response value from effect_data.

pred_var

Name of predictor value from bb_response_data.

predictor

Name of predictor value from effect_data.

env

Formula environment that should be used for fitting gam model.

increasing

If TRUE, spline approximation is increasing, if FALSE decreasing.

monotonic

Possible options "up", "down" and "auto. If up the spline is incresing, when down decreasing. +For auto the better one (based on r.squared statistic) is chosen.

...

Other arguments passed to mgcv::s function.

bb_response_data.effect_data.

Blackbox response data, for example pdp curve.

@@ -130,7 +131,7 @@

Examp
x <- sort(rnorm(20, 5, 5)) y <- rnorm(20, 2, 2) env <- new.env() -approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, TRUE)
#> Error in approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, TRUE): could not find function "approx_with_monotonic_spline"
+approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, "up")
#> Error in approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, "up"): could not find function "approx_with_monotonic_spline"
+ + + + + + + + + diff --git a/docs/reference/build_xspliner.html b/docs/reference/build_xspliner.html index e34210b..939834f 100644 --- a/docs/reference/build_xspliner.html +++ b/docs/reference/build_xspliner.html @@ -89,9 +89,9 @@

Helper function for building GLM object with transformed variables.

Helper function for building GLM object with transformed variables.

-
build_xspliner(formula, model, data, env = parent.frame(),
-  alter = list(numeric = "always", factor = "never"),
-  compare_stat = r_squared)
+
build_xspliner(formula, model, data, xf_opts = xf_opts_default,
+  xs_opts = xs_opts_default, link = "identity", family = "gaussian",
+  env = parent.frame(), compare_stat = aic)

Arguments

@@ -109,14 +109,26 @@

Ar

- - + + + + + + + + + + + + + + - - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 2d6ec77..75e730f 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -118,15 +118,15 @@

build_xspliner()

+

build_xs_function()

-

+ - + + + + + + + + + - - + + - - + + - - + + - - + + + + + diff --git a/examples/auto_choose_approx.R b/examples/auto_choose_approx.R index adf386b..5e6c393 100644 --- a/examples/auto_choose_approx.R +++ b/examples/auto_choose_approx.R @@ -8,8 +8,8 @@ boston.rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) model_pdp <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf, data = boston @@ -19,12 +19,12 @@ model_pdp <- xspline( # also choose automaticaly whether use bare or approximation model_pdp_auto <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf, data = boston, - alter = list(numeric = 'auto', factor = 'never') + alter = list(quantitative = 'consider_lm', quantitative = 'never') ) summary(model_pdp) diff --git a/examples/compare_with_gam.R b/examples/compare_with_gam.R index a454e94..d5547f9 100644 --- a/examples/compare_with_gam.R +++ b/examples/compare_with_gam.R @@ -1,5 +1,6 @@ library(randomForest) library(pdp) +library(xspliner) data(boston) set.seed(101) @@ -9,8 +10,8 @@ boston.rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) # build xspliner model with specified response method and approximation options model_pdp <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf, data = boston @@ -18,8 +19,8 @@ model_pdp <- xspline( model_ale <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "ale", K = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "ale", K = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "ale", K = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "ale", K = 40)) + age, model = boston.rf, data = boston diff --git a/examples/formula_transformation.R b/examples/formula_transformation.R index d14ed78..100364c 100644 --- a/examples/formula_transformation.R +++ b/examples/formula_transformation.R @@ -1,10 +1,10 @@ type <- "pdp" formula <- log(y) ~ - xs(x, method_opts = list(type = type)) * log(z) + xf(t) + w ^ 2 + log(a) + sqrt(exp(b)) + xs(d) + xs(x, effect = list(type = type)) * log(z) + xf(t) + w ^ 2 + log(a) + sqrt(exp(b)) + xs(d) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), w = rexp(10), a = rnorm(10), b = rnorm(10), d = rnorm(10)) variable_names <- extract_formula_var_names(formula, data) -formula_details <- get_formula_details(formula, variable_names) -special_components_details <- get_special_components_info(formula_details) -transform_formula_chr(formula_details, special_components_details) +formula_metadata <- get_formula_metadata(formula, variable_names) +special_components_details <- collect_specials_metadata(formula_metadata) +transform_formula_chr(formula_metadata, special_components_details) diff --git a/examples/inherit_parameters.R b/examples/inherit_parameters.R index 28b1eda..1112e57 100644 --- a/examples/inherit_parameters.R +++ b/examples/inherit_parameters.R @@ -4,10 +4,10 @@ oko <- 10 # construct formula formula <- log(y) ~ - xs(x, transform_opts = list(k = 6), method_opts = list(type = "pdp", object = oko)) * z + xf(t) + w ^ 2 + xs(x, effect = list(k = 6), method_opts = list(type = "pdp", object = oko)) * z + xf(t) + w ^ 2 # check if library functions inherits variables from parent.frame() formula_var_names <- extract_formula_var_names(formula, data) -formula_details <- get_formula_details(formula, formula_var_names) -additive_components_details <- get_special_components_info(formula_details) -transform_formula_chr(formula_details, additive_components_details) +formula_metadata <- get_formula_metadata(formula, formula_var_names) +additive_components_details <- collect_specials_metadata(formula_metadata) +transform_formula_chr(formula_metadata, additive_components_details) diff --git a/examples/link_examples.R b/examples/link_examples.R index c462d78..d59a6c6 100644 --- a/examples/link_examples.R +++ b/examples/link_examples.R @@ -45,8 +45,8 @@ plot(model, "crim") # and bare formula model <- xspline( - log(ptratio) ~ xs(cmedv, method_opts = list(type = "pdp"), spline_opts = list(k = 10)) + - xs(crim, method_opts = list(type = "pdp"), spline_opts = list(k = 10)), + log(ptratio) ~ xs(cmedv, effect = list(type = "pdp"), spline_opts = list(k = 10)) + + xs(crim, effect = list(type = "pdp"), spline_opts = list(k = 10)), model = model_rf, data = boston) plot(model, "cmedv") @@ -78,7 +78,7 @@ glm(chas ~ cmedv + crim, data = boston, family = binomial()) %>% summary glm(chas ~ cmedv + crim, data = boston, family = binomial()) %>% summary model_xs <- xspline(model_rf) -environment(model_xs)$xs_env_list[["cmedv"]] +environment(model_xs)$quantitative_transitions[["cmedv"]] plot(model_xs, "cmedv") plot(model_xs, "crim") diff --git a/examples/monotonic_spline.R b/examples/monotonic_spline.R index 36b56b8..2dfb345 100644 --- a/examples/monotonic_spline.R +++ b/examples/monotonic_spline.R @@ -10,11 +10,11 @@ boston.rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) model_pdp <- xspline( cmedv ~ xs(lstat, - transform_opts = list(k = 6, increasing = FALSE), - method_opts = list(type = "pdp", grid.resolution = 60)) + + transition = list(k = 6, increasing = FALSE), + effect = list(type = "pdp", grid.resolution = 60)) + xs(ptratio, - transform_opts = list(k = 4, increasing = FALSE), - method_opts = list(type = "pdp", grid.resolution = 40)) + + transition = list(k = 4, increasing = FALSE), + effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf, data = boston diff --git a/examples/params_test.R b/examples/params_test.R new file mode 100644 index 0000000..e69de29 diff --git a/examples/simple_graphics.R b/examples/simple_graphics.R index e4a5cb1..bab46a9 100644 --- a/examples/simple_graphics.R +++ b/examples/simple_graphics.R @@ -9,7 +9,7 @@ boston.rf <- randomForest(cmedv ~ rm + lstat + nox, data = boston) # build xspliner model with specified response method and approximation options model_pdp <- xspline( cmedv ~ rm + lstat + - xs(nox, method_opts = list(type = "pdp", grid.resolution = 100), transform_opts = list(k = 10, bs = "cr")), + xs(nox, effect = list(type = "pdp", grid.resolution = 100), transition = list(k = 10, bs = "cr")), model = boston.rf ) diff --git a/examples/xspline_objects_passing.R b/examples/xspline_objects_passing.R index 08ec8f0..ad91ba3 100644 --- a/examples/xspline_objects_passing.R +++ b/examples/xspline_objects_passing.R @@ -11,8 +11,20 @@ boston.rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) model <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + + age, + model = boston.rf, + data = boston +) +summary(model) + +# when xs has no parameters default one are taken + +model <- xspline( + cmedv ~ + xs(lstat) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf, data = boston @@ -25,7 +37,17 @@ model <- xspline( cmedv ~ lstat + age, model = boston.rf, data = boston, - exact = FALSE + consider = "all" +) +summary(model) + +# we can consider whereter to use linear or xs transformation automatically +model <- xspline( + cmedv ~ lstat + ptratio + age, + model = boston.rf, + data = boston, + consider = "all", + xs_opts = list(transition = list(alter = "auto")) ) summary(model) @@ -40,8 +62,18 @@ summary(model) model <- xspline( cmedv ~ ., model = boston.rf, - numeric_opts = list(method_opts = list(type = "pdp", grid.resolution = 40), transform_opts = list(k = 4)), - alter = list(numeric = 'auto') + xs_opts = list( + effect = list(type = "pdp", grid.resolution = 40), + transition = list(k = 4, alter = "auto")) +) +summary(model) + +model <- xspline( + cmedv ~ ., + model = boston.rf, + xs_opts = list( + effect = list(type = "pdp"), + transition = list(alter = "auto")) ) summary(model) diff --git a/man/approx_with_monotonic_spline.Rd b/man/approx_with_monotonic_spline.Rd index 43a7cb7..baf8284 100644 --- a/man/approx_with_monotonic_spline.Rd +++ b/man/approx_with_monotonic_spline.Rd @@ -4,21 +4,22 @@ \alias{approx_with_monotonic_spline} \title{Approximate monotonic spline on data} \usage{ -approx_with_monotonic_spline(bb_response_data, response_var, pred_var, - env = parent.frame(), increasing, ...) +approx_with_monotonic_spline(effect_data, response, predictor, + env = parent.frame(), monotonic, ...) } \arguments{ -\item{response_var}{Name of response value from bb_response_data.} +\item{response}{Name of response value from effect_data.} -\item{pred_var}{Name of predictor value from bb_response_data.} +\item{predictor}{Name of predictor value from effect_data.} \item{env}{Formula environment that should be used for fitting gam model.} -\item{increasing}{If TRUE, spline approximation is increasing, if FALSE decreasing.} +\item{monotonic}{Possible options "up", "down" and "auto. If up the spline is incresing, when down decreasing. +For auto the better one (based on r.squared statistic) is chosen.} \item{...}{Other arguments passed to \link{mgcv::s} function.} -\item{bb_response_data.}{Blackbox response data, for example pdp curve.} +\item{effect_data.}{Blackbox response data, for example pdp curve.} } \value{ Object of class "gam". See \link{mgcv::gamObject} @@ -30,5 +31,5 @@ It aproximates data with monotonic spline function by fitting GAM model. x <- sort(rnorm(20, 5, 5)) y <- rnorm(20, 2, 2) env <- new.env() -approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, TRUE) +approx_with_monotonic_spline(data.frame(x = x, y = y), "y", "x", env, "up") } diff --git a/man/approx_with_spline.Rd b/man/approx_with_spline.Rd index 60e7315..ccc81da 100644 --- a/man/approx_with_spline.Rd +++ b/man/approx_with_spline.Rd @@ -4,15 +4,15 @@ \alias{approx_with_spline} \title{Approximate spline on data} \usage{ -approx_with_spline(bb_response_data, response_var, pred_var, +approx_with_spline(effect_data, response, predictor, env = parent.frame(), ...) } \arguments{ -\item{bb_response_data}{Blackbox response data, for example pdp curve.} +\item{effect_data}{Blackbox response data, for example pdp curve.} -\item{response_var}{Name of response value from bb_response_data.} +\item{response}{Name of response value from effect_data.} -\item{pred_var}{Name of predictor value from bb_response_data.} +\item{predictor}{Name of predictor value from effect_data.} \item{env}{Formula environment that should be used for fitting gam model.} diff --git a/man/get_xs_call.Rd b/man/build_xs_function.Rd similarity index 77% rename from man/get_xs_call.Rd rename to man/build_xs_function.Rd index 2fc8f27..8491517 100644 --- a/man/get_xs_call.Rd +++ b/man/build_xs_function.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-approximations.R -\name{get_xs_call} -\alias{get_xs_call} +\name{build_xs_function} +\alias{build_xs_function} \title{It extracts fitted spline function by \link{xspline_approx_gam}.} \usage{ -get_xs_call(xs_env, pred_var_name) +build_xs_function(quantitative_transition, predictor_name) } \arguments{ \item{fitted_gam}{Fitted gam model approximating data.} @@ -28,10 +28,10 @@ blackbox <- randomForest::randomForest(y ~ ., data) z_var_response <- pdp::partial(blackbox, "z") z_var_response_approx <- approx_with_spline(z_var_response, "yhat", "z") z_env <- list( - blackbox_response_obj = z_var_response, - blackbox_response_approx = z_var_response_approx + effect_outcome = z_var_response, + transition_outcome = z_var_response_approx ) -z_var_spline <- get_xs_call(z_env, "z") +z_var_spline <- build_xs_function(z_env, "z") z_range <- attr(z_var_spline, "variable_range") z_axis <- seq(z_range[1], z_range[2], length.out = 50) @@ -42,10 +42,10 @@ lines(z_axis, z_var_spline(z_axis)) x_var_response <- pdp::partial(blackbox, "x") x_var_response_approx <- approx_with_spline(x_var_response, "yhat", "x") x_env <- list( - blackbox_response_obj = x_var_response, - blackbox_response_approx = x_var_response_approx + effect_outcome = x_var_response, + transition_outcome = x_var_response_approx ) -x_var_spline <- get_xs_call(x_env, "x") +x_var_spline <- build_xs_function(x_env, "x") x_range <- attr(x_var_spline, "variable_range") x_axis <- seq(x_range[1], x_range[2], length.out = 50) diff --git a/man/build_xspliner.Rd b/man/build_xspliner.Rd index 3fd5807..a8e4f59 100644 --- a/man/build_xspliner.Rd +++ b/man/build_xspliner.Rd @@ -4,9 +4,9 @@ \alias{build_xspliner} \title{Helper function for building GLM object with transformed variables.} \usage{ -build_xspliner(formula, model, data, link = "identity", - family = "gaussian", env = parent.frame(), alter = list(numeric = - "always", factor = "never"), compare_stat = r_squared) +build_xspliner(formula, model, data, xf_opts = xf_opts_default, + xs_opts = xs_opts_default, link = "identity", family = "gaussian", + env = parent.frame(), compare_stat = aic) } \arguments{ \item{formula}{xspliner-specific formula object. Check vignette("xspliner") for more details.} @@ -15,6 +15,10 @@ build_xspliner(formula, model, data, link = "identity", \item{data}{Training data of \code{model}.} +\item{xf_opts}{Formula parameters used for factor variable transformatoins inherited from factorMerger package.} + +\item{xs_opts}{Predictive model response method and approximation parameters used for quantitative.} + \item{link}{Link function that should be used in final model. The passed is used when cannot be extracted from model. By default 'identity'. See \link{stats::family} for possibilities.} @@ -23,10 +27,6 @@ be extracted from model. By default 'gaussian'. See \link{stats::family} for pos \item{env}{Environment in which optional variables passed into parameters are stored.} -\item{alter}{Specifies if each variable transformation should be compared with bare variable usage. -List of the form: list(numeric = type, factor = type), type one of c('always', 'auto', 'never). -'auto' option available only for qualitative variable. The better model is specified by \code{compare_stat} parameter then.} - \item{compare_stat}{Function of linear model (lm function output). Statistic that measures if linear model is better that transformed one. See \link{stats}.} } diff --git a/man/stats.Rd b/man/stats.Rd index 7f6657e..88ad031 100644 --- a/man/stats.Rd +++ b/man/stats.Rd @@ -2,13 +2,18 @@ % Please edit documentation in R/xspline.R \name{stats} \alias{stats} -\alias{r_squared} +\alias{aic} +\alias{hoslem} \title{Statistics used for better linear model selection} \usage{ -r_squared(lm_model) +aic(glm_model) + +hoslem(glm_model) } \arguments{ -\item{lm_model}{Linear model - \code{lm} function output.} +\item{glm_model}{Linear model - \code{glm} function output.} + +\item{family}{Family used for fitting the model.} } \description{ Used as \code{compare_stat} parameter in \code{xspline} method. diff --git a/man/xf_opts_default.Rd b/man/xf_opts_default.Rd new file mode 100644 index 0000000..9abe44c --- /dev/null +++ b/man/xf_opts_default.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xspline.R +\docType{data} +\name{xf_opts_default} +\alias{xf_opts_default} +\alias{xs_opts_default} +\title{Default parameters for transition methods} +\format{An object of class \code{list} of length 2.} +\usage{ +xf_opts_default + +xs_opts_default +} +\description{ +While constructing formula interpreted by xspliner package, some parameters may be specified within xs(..) or xf(..) symbols. +Below are default parameters. See details in \code{vignette("xspliner")} +} +\keyword{datasets} diff --git a/man/xspline.Rd b/man/xspline.Rd index f0dcb08..9f49ab9 100644 --- a/man/xspline.Rd +++ b/man/xspline.Rd @@ -10,13 +10,12 @@ xspline(object, ...) \method{xspline}{default}(model, lhs = NULL, response = NULL, - predictors = NULL, data = NULL, env = parent.frame(), - factor_opts = factor_opts_default, - numeric_opts = numeric_opts_default, ...) - -\method{xspline}{formula}(formula, model, data = NULL, exact = TRUE, + predictors = NULL, data = NULL, form = "additive", env = parent.frame(), ...) +\method{xspline}{formula}(formula, model, data = NULL, + consider = "specials", env = parent.frame(), ...) + \method{xspline}{explainer}(explainer, env = parent.frame(), ...) } \arguments{ @@ -26,22 +25,23 @@ xspline(object, ...) \item{model}{Predictive model. Basic model used for extracting predictors transformation.} +\item{lhs}{Left-hand side of model formula. Can be transformed response.} + \item{response}{Name of response variable of \code{model}.} \item{predictors}{Predictor values that should be used in final model.} \item{data}{Training data of \code{model}.} -\item{env}{Environment in which optional variables passed into parameters are stored.} - -\item{factor_opts}{Formula parameters used for factor variable transformatoins inherited from factorMerger package.} +\item{form}{Can be 'additive' (default) or 'multiplicative'. Specifies formula form in final model.} -\item{numeric_opts}{Predictive model response method and approximation parameters used for numeric +\item{env}{Environment in which optional variables passed into parameters are stored. variables transformation. See vignette("xspliner") for details.} \item{formula}{xspliner-specific formula object. Check vignette("xspliner") for more details.} -\item{exact}{If TRUE, exact formula call is used. If not all formula variables are altered.} +\item{consider}{One of \code{c("specials", "all")}. If "specials", only components with xs or xf +call are considered in transition.} \item{explainer}{Object of class 'explainer' (see \link{DALEX} package).} } diff --git a/tests/testthat/test-model.R b/tests/testthat/test-model.R index dba7178..906e418 100644 --- a/tests/testthat/test-model.R +++ b/tests/testthat/test-model.R @@ -1,7 +1,7 @@ context("test-model") test_that("xspline return object with correct attributes and values", { - formula <- y ~ xs(x, method_opts = list(type = "pdp")) * z + formula <- y ~ xs(x, effect = list(type = "pdp")) * z data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10)) blackbox <- randomForest::randomForest(y ~ ., data) model <- xspline(formula, blackbox, data) @@ -9,13 +9,13 @@ test_that("xspline return object with correct attributes and values", { expect_true("glm" %in% class(model)) expect_equal( ls(environment(model$formula)), - c("response", "xf", "xf_call", "xf_env_list", "xs", "xs_call", "xs_env_list")) + c("qualitative_transitions", "quantitative_transitions", "response", "xf", "xf_functions", "xs", "xs_functions")) expect_equal(deparse(model$formula), "y ~ xs(x) * z") }) test_that("xspline correctly predicts values", { set.seed(123) - formula <- y ~ xs(x, method_opts = list(type = "pdp")) * z + formula <- y ~ xs(x, effect = list(type = "pdp")) * z data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10)) blackbox <- randomForest::randomForest(y ~ ., data) model <- xspline(formula, blackbox, data) diff --git a/tests/testthat/test-utils-approximations.R b/tests/testthat/test-utils-approximations.R index 0ecf74e..d6c3015 100644 --- a/tests/testthat/test-utils-approximations.R +++ b/tests/testthat/test-utils-approximations.R @@ -2,12 +2,12 @@ context("test-utils-approximations") test_that("formula for spline approximation is prepared correctly", { env <- new.env() - expect_equal(get_spline_formula("y", "x", env), as.formula("y ~ s(x)", env = env)) + expect_equal(build_approximation_formula("y", "x", env), as.formula("y ~ s(x)", env = env)) - expect_equal(get_spline_formula("y", "x", env, k = 6), as.formula("y ~ s(x, k = 6)", env = env)) + expect_equal(build_approximation_formula("y", "x", env, k = 6), as.formula("y ~ s(x, k = 6)", env = env)) - params = list(pred_var = "x", response_var = "y", env = env, k = 6) - expect_equal(do.call(get_spline_formula, params), as.formula("y ~ s(x, k = 6)", env = env)) + params = list(predictor = "x", response = "y", env = env, k = 6) + expect_equal(do.call(build_approximation_formula, params), as.formula("y ~ s(x, k = 6)", env = env)) }) test_that("spline approximation is made correctly with mgcv::gam and mgcv::s", { @@ -15,22 +15,22 @@ test_that("spline approximation is made correctly with mgcv::gam and mgcv::s", { data <- data.frame(x = 1:10, y = 11:20) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) as.formula("y ~ s(x)", env = env), + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x)", env = env), expect_true("gam" %in% class(approx_with_spline(data, "y", "x", env))) ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) as.formula("y ~ s(x)", env = env), + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x)", env = env), expect_equal(approx_with_spline(data, "y", "x", env)$formula, as.formula("y ~ s(x)", env = env)) ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) as.formula("y ~ s(x, k = 6)", env = env), + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x, k = 6)", env = env), expect_equal(approx_with_spline(data, "y", "x", env, k = 6)$smooth[[1]]$bs.dim, 6) ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) as.formula("y ~ s(x, k = 6)", env = env), + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x, k = 6)", env = env), expect_equal(approx_with_spline(data, "y", "x", env, k = 6)$formula, as.formula("y ~ s(x, k = 6)", env = env)) ) }) @@ -40,13 +40,13 @@ test_that("monotonic spline approximation is made correctly with mgcv::gam and m data <- data.frame(x = 1:10, y = sort(rnorm(10))) suppressWarnings({ with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x)", env = env), expect_true("gam" %in% class(approx_with_monotonic_spline(data, "y", "x", env, TRUE))) ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x)", env = env), expect_equal( approx_with_monotonic_spline(data, "y", "x", env, TRUE)$formula, @@ -54,13 +54,13 @@ test_that("monotonic spline approximation is made correctly with mgcv::gam and m ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x, k = 6)", env = env), expect_equal(approx_with_monotonic_spline(data, "y", "x", env, TRUE, k = 6)$smooth[[1]]$bs.dim, 6) ) with_mock( - get_spline_formula = function(response_var, pred_var, env, ...) + build_approximation_formula = function(response, predictor, env, ...) as.formula("y ~ s(x, k = 6)", env = env), expect_equal( approx_with_monotonic_spline(data, "y", "x", env, TRUE, k = 6)$formula, @@ -69,125 +69,137 @@ test_that("monotonic spline approximation is made correctly with mgcv::gam and m }) }) -test_that("prepare_spline_params_pdp correctly gets pdp response and its approximation", { - formula <- log(y) ~ xs(x, method_opts = list(type = "pdp")) * z + xf(t) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "z", "t", "a")) +test_that("prepare_transition_params_pdp correctly gets pdp response and its approximation", { + formula <- log(y) ~ xs(x, effect = list(type = "pdp")) * z + xf(t) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "z", "t", "a")) set.seed(123) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), a = rexp(10)) blackbox <- randomForest::randomForest(y ~ ., data) - special_components_details <- get_special_components_info(formula_details) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) - x_var_spline_params <- prepare_spline_params_pdp(formula_details, special_components_details$x, blackbox, data) + x_var_spline_params <- prepare_transition_params_pdp(formula_metadata, special_components_details$x, blackbox, data) - expect_equal(names(x_var_spline_params), c("bb_response_data", "pred_var", "response_var", "env")) - expect_true("partial" %in% class(x_var_spline_params$bb_response_data)) - expect_equal(colnames(x_var_spline_params$bb_response_data), c("x", "yhat")) + expect_equal(names(x_var_spline_params), c("alter", "monotonic", "effect_data", "predictor", "response", "env")) + expect_true("partial" %in% class(x_var_spline_params$effect_data)) + expect_equal(colnames(x_var_spline_params$effect_data), c("x", "yhat")) }) -test_that("prepare_spline_params_ale correctly gets ale response and its approximation", { - formula <- log(y) ~ xs(x, method_opts = list(type = "ale")) * z + xf(t) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "z", "t", "a")) +test_that("prepare_transition_params_ale correctly gets ale response and its approximation", { + formula <- log(y) ~ xs(x, effect = list(type = "ale")) * z + xf(t) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "z", "t", "a")) set.seed(123) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), a = rexp(10)) blackbox <- randomForest::randomForest(y ~ ., data) - special_components_details <- get_special_components_info(formula_details) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) - x_var_spline_params <- prepare_spline_params_ale(formula_details, special_components_details$x, blackbox, data) + x_var_spline_params <- prepare_transition_params_ale(formula_metadata, special_components_details$x, blackbox, data) - expect_equal(names(x_var_spline_params), c("bb_response_data", "pred_var", "response_var", "env")) - expect_equal("data.frame", class(x_var_spline_params$bb_response_data)) - expect_equal(colnames(x_var_spline_params$bb_response_data), c("x", "yhat")) + expect_equal(names(x_var_spline_params), c("alter", "monotonic", "effect_data", "predictor", "response", "env")) + expect_equal("data.frame", class(x_var_spline_params$effect_data)) + expect_equal(colnames(x_var_spline_params$effect_data), c("x", "yhat")) }) -test_that("numeric_component_env correctly gets response and its approximation", { - formula <- log(y) ~ xs(x, method_opts = list(type = "pdp")) * z + xf(t) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "z", "t", "a")) +test_that("get_quantitative_transition correctly gets response and its approximation", { + formula <- log(y) ~ xs(x, effect = list(type = "pdp")) * z + xf(t) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "z", "t", "a")) set.seed(123) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), a = rexp(10)) blackbox <- randomForest::randomForest(y ~ ., data) - special_components_details <- get_special_components_info(formula_details) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) - x_var_approx_env <- numeric_component_env(formula_details, special_components_details$x, blackbox, data) + x_var_approx_env <- get_quantitative_transition(formula_metadata, special_components_details$x, blackbox, data) - expect_equal(names(x_var_approx_env), c("blackbox_response_obj", "blackbox_response_approx")) - expect_true("partial" %in% class(x_var_approx_env$blackbox_response_obj)) - expect_true("gam" %in% class(x_var_approx_env$blackbox_response_approx)) - expect_equal(colnames(x_var_approx_env$blackbox_response_obj), c("x", "yhat")) + expect_equal(names(x_var_approx_env), c("effect_outcome", "transition_outcome")) + expect_true("partial" %in% class(x_var_approx_env$effect_outcome)) + expect_true("glm" %in% class(x_var_approx_env$transition_outcome)) + expect_equal(colnames(x_var_approx_env$effect_outcome), c("x", "yhat")) - expect_error(numeric_component_env(formula_details, special_components_details$t, blackbox, data)) + expect_error(get_quantitative_transition(formula_metadata, special_components_details$t, blackbox, data)) }) -test_that("get_common_components_env correctly gets response and its approximation for all special calls", { +test_that("get_transitions_outcome correctly gets response and its approximation for all special calls", { formula <- log(y) ~ - xs(x, method_opts = list(type = "pdp")) * z + xs(t, method_opts = list(type = "pdp", k = 6)) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "z", "t", "a")) + xs(x, effect = list(type = "pdp")) * z + xs(t, effect = list(type = "pdp", k = 6)) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "z", "t", "a")) set.seed(123) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), a = rexp(10)) blackbox <- randomForest::randomForest(y ~ ., data) - special_components_details <- get_special_components_info(formula_details) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) - vars_approx_env <- get_common_components_env(formula_details, special_components_details, blackbox, data) + vars_approx_env <- get_transitions_outcome(formula_metadata, special_components_details, blackbox, data) - expect_equal(names(vars_approx_env), c("xs_env", "xf_env")) - expect_equal(vars_approx_env$xf_env, list()) - expect_equal(names(vars_approx_env$xs_env$x), c("blackbox_response_obj", "blackbox_response_approx")) - expect_equal(names(vars_approx_env$xs_env$t), c("blackbox_response_obj", "blackbox_response_approx")) - expect_equal(colnames(vars_approx_env$xs_env$x$blackbox_response_obj), c("x", "yhat")) - expect_equal(colnames(vars_approx_env$xs_env$t$blackbox_response_obj), c("t", "yhat")) + expect_equal(names(vars_approx_env), c("quantitative", "qualitative")) + expect_equal(vars_approx_env$qualitative_transition, NULL) + expect_equal(names(vars_approx_env$quantitative$x), c("effect_outcome", "transition_outcome")) + expect_equal(names(vars_approx_env$quantitative$t), c("effect_outcome", "transition_outcome")) + expect_equal(colnames(vars_approx_env$quantitative$x$effect_outcome), c("x", "yhat")) + expect_equal(colnames(vars_approx_env$quantitative$t$effect_outcome), c("t", "yhat")) }) -test_that("get_xs_call correctly use gam object for prediction", { +test_that("build_xs_function correctly use gam object for prediction", { set.seed(123) x = rnorm(10) y = 2 * x + rnorm(10, 0, 0.001) data <- data.frame(x, y) - xs_env <- list() - xs_env$blackbox_response_approx <- mgcv::gam(y ~ x, data = data) - xs_env$blackbox_response_obj <- data.frame(x, y) - xs <- get_xs_call(xs_env, "x") + quantitative_transition <- list() + quantitative_transition$transition_outcome <- mgcv::gam(y ~ x, data = data) + quantitative_transition$effect_outcome <- data.frame(x, y) + xs <- build_xs_function(quantitative_transition, "x") expect_equivalent(round(xs(1)), 2) - expect_equal(attr(xs, "variable_range"), range(xs_env$blackbox_response_obj$x)) + expect_equal(attr(xs, "variable_range"), range(quantitative_transition$effect_outcome$x)) - expect_error(suppressWarnings(get_xs_call(xs_env, "t")(1))) + expect_error(suppressWarnings(build_xs_function(quantitative_transition, "t")(1))) }) # (todo) -# test_that("get_xf_call correctly use gam object for prediction", { +# test_that("build_xs_function correctly use gam object for prediction", { # }) -test_that("is_lm_better_than_approx correctly chooses model", { +test_that("is_lm_better_than_xs correctly chooses model", { x = 1:10 y = 2 * x + rnorm(10, 0, 0.001) data <- data.frame(x, y) approx_fun <- sin - expect_true(is_lm_better_than_approx(data, "y", "x", approx_fun, r_squared)) + expect_true(is_lm_better_than_xs(data, "y", "x", approx_fun, gaussian(), aic)) y = x ^ 2 + rnorm(10, 0, 0.001) data <- data.frame(x, y) approx_fun <- function(x) x ^ 2 - expect_false(is_lm_better_than_approx(data, "y", "x", approx_fun, r_squared)) + expect_false(is_lm_better_than_xs(data, "y", "x", approx_fun, gaussian(), aic)) }) test_that("correct_improved_components changes calls", { - formula <- y ~ xs(x, method_opts = list(type = "pdp")) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "a")) + formula <- y ~ xs(x, effect = list(type = "pdp")) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "a")) x = 1:10 data <- data.frame(y = 2 * x + rnorm(10, 0, 0.1), x, a = rexp(10)) - special_components_details <- get_special_components_info(formula_details) - xs <- sin - xf <- function(x) x - alter <- function(type) list(numeric = type) - expect_equal( - correct_improved_components(alter('always'), r_squared, xs, xf, special_components_details, data, "log(y)"), - special_components_details) - expect_equal( - correct_improved_components(alter('auto'), r_squared, xs, xf, special_components_details, data, "y")$x$new_call, - "x") + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) + special_components_details <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) - data <- data.frame(y = sin(x) + rnorm(10, 0, 0.1), x, a = rexp(10)) expect_equal( - correct_improved_components(alter('auto'), r_squared, xs, xf, special_components_details, data, "y"), + correct_improved_components(special_components_details, "x"), special_components_details) + expect_equal( + correct_improved_components(special_components_details, "y")$x$new_call, "x") }) diff --git a/tests/testthat/test-utils-formula-build.R b/tests/testthat/test-utils-formula-build.R index 9967a1b..dedc757 100644 --- a/tests/testthat/test-utils-formula-build.R +++ b/tests/testthat/test-utils-formula-build.R @@ -3,33 +3,33 @@ context("test-utils-formula-build") test_that("raw variable names correspond to additive components", { type <- "pdp" formula <- log(y) ~ - xs(x, method_opts = list(type = type)) * z + xf(t) + w ^ 2 + I(z ^ 2) + xs(x, effect = list(type = type)) * z + xf(t) + w ^ 2 + I(z ^ 2) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), w = rexp(10)) formula_terms <- terms.formula(formula, specials = c("xs", "xf")) # (todo) test shows that functionality doesn't work when variables are duplicated expect_equal( - get_formula_raw_components(formula_terms), - c("xs(x, method_opts = list(type = type))", "z", "xf(t)", "w", "I(z^2)") + get_formula_single_components(formula_terms), + c("xs(x, effect = list(type = type))", "z", "xf(t)", "w", "I(z^2)") ) data$a <- rnorm(10) data$b <- rnorm(10) formula_2 <- log(y) ~ - xs(x, method_opts = list(type = type)) * log(z) + xf(t) + w ^ 2 + log(a) + sqrt(exp(b)) + xs(x, effect = list(type = type)) * log(z) + xf(t) + w ^ 2 + log(a) + sqrt(exp(b)) formula_terms <- terms.formula(formula_2, specials = c("xs", "xf")) expect_equal( - get_formula_raw_components(formula_terms), - c("xs(x, method_opts = list(type = type))", "log(z)", "xf(t)", "w", "log(a)", "sqrt(exp(b))") + get_formula_single_components(formula_terms), + c("xs(x, effect = list(type = type))", "log(z)", "xf(t)", "w", "log(a)", "sqrt(exp(b))") ) }) test_that("formula variables are extracted properly", { data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = runif(10), w = rexp(10)) is_centered <- FALSE - formula_1 <- y ~ log(x) + xs(z, transform_opts = list(k = 6)) + formula_1 <- y ~ log(x) + xs(z, transition = list(k = 6)) formula_2 <- log(y) ~ - xs(x, transform_opts = list(k = 6), method_opts = list(type = "pdp", center = is_centered)) * z + xf(t) + w ^ 2 + xs(x, transition = list(k = 6), effect = list(type = "pdp", center = is_centered)) * z + xf(t) + w ^ 2 expect_equal(extract_formula_var_names(formula_1, data), c("y", "x", "z")) expect_equal(extract_formula_var_names(formula_2, data), c("y", "x", "z", "t", "w")) @@ -40,86 +40,90 @@ test_that("formula variables are extracted properly", { x <- rnorm(10) y <- x + rnorm(10, 0, 0.001) z <- x ^ 2 + rnorm(10, 0, 0.001) - formula_3 <- y ~ log(x, method_opts = list(type = type)) + xs(z, transform_opts = list(k = 6)) + formula_3 <- y ~ log(x, effect = list(type = type)) + xs(z, transition = list(k = 6)) expect_equal(suppressWarnings(extract_formula_var_names(formula_3)), c("y", "x", "z")) # (todo) should z be duplicated? type <- "pdp" formula_4 <- log(y) ~ - xs(x, method_opts = list(type = type)) * z + xf(t) + w ^ 2 + z ^ 2 + xs(x, effect = list(type = type)) * z + xf(t) + w ^ 2 + z ^ 2 expect_equal(extract_formula_var_names(formula_4, data), c("y", "x", "z", "t", "w")) }) -test_that("get_formula_details returns formula info in correct form", { - # get_formula_details depends on other functions -> use with_mock +test_that("get_formula_metadata returns formula info in correct form", { + # get_formula_metadata depends on other functions -> use with_mock is_centered <- FALSE formula <- log(y) ~ - xs(x, transform_opts = list(k = 6), method_opts = list(type = "pdp", center = is_centered)) * z + xf(t) + w ^ 2 + xs(x, transition = list(k = 6), effect = list(type = "pdp", center = is_centered)) * z + xf(t) + w ^ 2 variable_names <- c("y", "x", "z", "t", "w") - formula_details <- get_formula_details(formula, variable_names) - - expect_equal(formula_details$formula, formula) - expect_equal(formula_details$response, "y") - expect_equal(formula_details$predictors, c("x", "z", "t", "w")) - expect_equal(formula_details$lhs, "log(y)") - expect_equal(formula_details$xs_variables, "x") - expect_equal(formula_details$xf_variables, "t") + formula_metadata <- get_formula_metadata(formula, variable_names) + + expect_equal(formula_metadata$formula, formula) + expect_equal(formula_metadata$response, "y") + expect_equal(formula_metadata$predictors, c("x", "z", "t", "w")) + expect_equal(formula_metadata$lhs, "log(y)") + expect_equal(formula_metadata$xs_variables, "x") + expect_equal(formula_metadata$xf_variables, "t") expect_identical( - formula_details$additive_components, - c("xs(x, transform_opts = list(k = 6), method_opts = list(type = \"pdp\", center = is_centered))", + formula_metadata$additive_components, + c("xs(x, transition = list(k = 6), effect = list(type = \"pdp\", center = is_centered))", "z", "xf(t)", "w") ) }) -test_that("prepare_call transforms component into simple form", { - expect_equal(prepare_call("log(x)"), "log(x)") - expect_equal(prepare_call("x ^ 2"), "x ^ 2") - expect_equal(prepare_call("I(x ^ 2)"), "I(x ^ 2)") - expect_equal(prepare_call("xs(x)"), "xs(x)") - expect_equal(prepare_call("xf(x)"), "xf(x)") - expect_equal(prepare_call("xs(var_name)"), "xs(var_name)") - expect_equal(prepare_call("xs(x, method_opts = list(type = 'pdp'))"), "xs(x)") - expect_equal(prepare_call("xs(x ^ 2)"), "xs(x ^ 2)") - expect_equal(prepare_call("xs(x ^ 2, method_opts = list(type = 'pdp'))"), "xs(x ^ 2)") +test_that("clear_special_component transforms component into simple form", { + expect_equal(clear_special_component("log(x)"), "log(x)") + expect_equal(clear_special_component("x ^ 2"), "x ^ 2") + expect_equal(clear_special_component("I(x ^ 2)"), "I(x ^ 2)") + expect_equal(clear_special_component("xs(x)"), "xs(x)") + expect_equal(clear_special_component("xf(x)"), "xf(x)") + expect_equal(clear_special_component("xs(var_name)"), "xs(var_name)") + expect_equal(clear_special_component("xs(x, effect = list(type = 'pdp'))"), "xs(x)") + expect_equal(clear_special_component("xs(x ^ 2)"), "xs(x ^ 2)") + expect_equal(clear_special_component("xs(x ^ 2, effect = list(type = 'pdp'))"), "xs(x ^ 2)") }) test_that("formula component parameters are extracted correctly", { env_0 <- new.env() env_1 <- new.env() # parent.frame as parent env env_1$a <- 1 - env_1$type <- "pdp" + env_1$type <- "foo" env_2 <- as.environment(list(a = 1, type = "pdp")) # empty env as parent env additive_component_1 <- "xf(x)" - additive_component_2 <- "xs(x, method_opts = list(type = 'pdp'))" - additive_component_3 <- "xs(x, method_opts = list(type = type))" - additive_component_4 <- "xs(x, method_opts = list(type = type), transform_opts = list(k = a))" + additive_component_2 <- "xs(x, effect = list(type = 'ale'))" + additive_component_3 <- "xs(x, effect = list(type = type))" + additive_component_4 <- "xs(x, effect = list(type = type), transition = list(k = a))" + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(alter = "never", stat = "GIC", value = 3)) expect_identical( - get_component_params(additive_component_1, env_0), - list(transform_opts = NULL, method_opts = NULL) + get_component_params(additive_component_1, xs_opts, xf_opts, env_0), + list(transition = xf_opts$transition, effect = xf_opts$effect) ) expect_identical( - get_component_params(additive_component_2, env_0), - list(transform_opts = NULL, method_opts = list(type = "pdp")) + get_component_params(additive_component_2, xs_opts, xf_opts, env_0), + list(transition = xs_opts$transition, effect = list(type = "ale")) ) expect_error( - get_component_params(additive_component_3, env_0) + get_component_params(additive_component_3, xs_opts, xf_opts, env_0) ) expect_identical( - get_component_params(additive_component_3, env_1), - list(transform_opts = NULL, method_opts = list(type = "pdp")) + get_component_params(additive_component_3, xs_opts, xf_opts, env_1), + list(transition = xs_opts$transition, effect = list(type = "foo")) ) expect_identical( - get_component_params(additive_component_4, env_1), - list(transform_opts = list(k = 1), method_opts = list(type = "pdp")) + get_component_params(additive_component_4, xs_opts, xf_opts, env_1), + list(transition = list(k = 1, alter = "always", monotonic = "not"), effect = list(type = "foo")) ) expect_error( - get_component_params(additive_component_3, env_2) + get_component_params(additive_component_3, xs_opts, xf_opts, env_2) ) expect_error( - get_component_params(additive_component_4, env_2) + get_component_params(additive_component_4, xs_opts, xf_opts, env_2) ) }) @@ -128,98 +132,109 @@ test_that("additive component details are extracted and stored correctly", { env_0 <- new.env() env_1 <- new.env() env_1$a <- 1 - env_1$type <- "pdp" + env_1$type <- "foo" additive_component_1 <- "xf(x)" - additive_component_2 <- "xs(x, method_opts = list(type = 'pdp'))" - additive_component_3 <- "xs(x, method_opts = list(type = type))" - additive_component_4 <- "xs(x, method_opts = list(type = type), transform_opts = list(k = a))" + additive_component_2 <- "xs(x, effect = list(type = 'pdp'))" + additive_component_3 <- "xs(x, effect = list(type = type))" + additive_component_4 <- "xs(x, effect = list(type = type), transition = list(k = a))" + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(alter = "newer", stat = "GIC", value = 3)) expect_equal( - get_special_component_details("x", additive_component_1, env_0), + get_special_component_metadata("x", additive_component_1, xs_opts, xf_opts, env_0), list(var = "x", call = "xf(x)", new_call = "xf(x)", - call_fun = "xf", transform_opts = NULL, method_opts = NULL) + call_fun = "xf", transition = xf_opts$transition, effect = xf_opts$effect) ) expect_equal( - get_special_component_details("x", additive_component_2, env_0), - list(var = "x", call = "xs(x, method_opts = list(type = 'pdp'))", - new_call = "xs(x)", call_fun = "xs", transform_opts = NULL, method_opts = list(type = "pdp")) + get_special_component_metadata("x", additive_component_2, xs_opts, xf_opts, env_0), + list(var = "x", call = "xs(x, effect = list(type = 'pdp'))", + new_call = "xs(x)", call_fun = "xs", transition = xs_opts$transition, effect = list(type = "pdp")) ) expect_error( - get_special_component_details("x", additive_component_3, env_0) + get_special_component_metadata("x", additive_component_3, xs_opts, xf_opts, env_0) ) expect_equal( - get_special_component_details("x", additive_component_3, env_1), - list(var = "x", call = "xs(x, method_opts = list(type = type))", new_call = "xs(x)", - call_fun = "xs", transform_opts = NULL, method_opts = list(type = "pdp")) + get_special_component_metadata("x", additive_component_3, xs_opts, xf_opts, env_1), + list(var = "x", call = "xs(x, effect = list(type = type))", new_call = "xs(x)", + call_fun = "xs", transition = xs_opts$transition, effect = list(type = "foo")) ) expect_equal( - get_special_component_details("x", additive_component_4, env_1), - list(var = "x", call = "xs(x, method_opts = list(type = type), transform_opts = list(k = a))", new_call = "xs(x)", - call_fun = "xs", transform_opts = list(k = 1), method_opts = list(type = "pdp")) + get_special_component_metadata("x", additive_component_4, xs_opts, xf_opts, env_1), + list(var = "x", call = "xs(x, effect = list(type = type), transition = list(k = a))", new_call = "xs(x)", + call_fun = "xs", transition = list(k = 1, alter = "always", monotonic = "not"), effect = list(type = "foo")) ) }) -test_that("get_special_components_info properly identifies special components and extracts its details", { - formula_details <- list( - formula = log(y) ~ xs(x, method_opts = list(type = "type")) * z + xf(t) + log(a) + xs(d), +test_that("collect_specials_metadata properly identifies special components and extracts its details", { + formula_metadata <- list( + formula = log(y) ~ xs(x, effect = list(type = "type")) * z + xf(t) + log(a) + xs(d), response = "y", predictors = c("x", "z", "t", "a", "d"), lhs = "log(y)", - rhs = "xs(x, method_opts = list(type = \"type\")) * z + xf(t) + log(a) + xs(d)", - additive_components = c("xs(x, method_opts = list(type = \"type\"))", "z", "xf(t)", "log(a)", "xs(d)"), + rhs = "xs(x, effect = list(type = \"type\")) * z + xf(t) + log(a) + xs(d)", + additive_components = c("xs(x, effect = list(type = \"type\"))", "z", "xf(t)", "log(a)", "xs(d)"), xs_variables = c("x", "d"), xf_variables = "t", xs_variables_idx = c(1, 5), xf_variables_idx = 3) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(stat = "GIC", value = 3)) - special_components_info <- get_special_components_info(formula_details) + special_components_info <- collect_specials_metadata(formula_metadata, xs_opts, xf_opts) expect_equal(names(special_components_info), c("x", "d", "t")) expect_equal( special_components_info$x, - list(var = "x", call = "xs(x, method_opts = list(type = \"type\"))", - new_call = "xs(x)", call_fun = "xs", transform_opts = NULL, method_opts = list(type = "type")) + list(var = "x", call = "xs(x, effect = list(type = \"type\"))", + new_call = "xs(x)", call_fun = "xs", transition = xs_opts$transition, effect = list(type = "type")) ) }) test_that("formula last string form is correct", { - formula_details <- list( - formula = log(y) ~ xs(x, method_opts = list(type = "type")) * z + xf(t) + log(a), + formula_metadata <- list( + formula = log(y) ~ xs(x, effect = list(type = "type")) * z + xf(t) + log(a), response = "y", predictors = c("x", "z", "t", "a"), lhs = "log(y)", - rhs = "xs(x, method_opts = list(type = \"type\")) * z + xf(t) + log(a)", - additive_components = c("xs(x, method_opts = list(type = \"type\"))", "z", "xf(t)", "log(a)"), + rhs = "xs(x, effect = list(type = \"type\")) * z + xf(t) + log(a)", + additive_components = c("xs(x, effect = list(type = \"type\"))", "z", "xf(t)", "log(a)"), xs_variables = "x", xf_variables = "t", xs_variables_idx = 1, xf_variables_idx = 3) special_component_info <- list( - x = list(var = "x", call = "xs(x, method_opts = list(type = \"type\"))", - new_call = "xs(x)", transform_opts = NULL, method_opts = list(type = "type")), + x = list(var = "x", call = "xs(x, effect = list(type = \"type\"))", + new_call = "xs(x)", transition = list(alter = "auto"), effect = list(type = "type")), t = list(var = "t", call = "xf(t)", - new_call = "xf(t)", transform_opts = NULL, method_opts = NULL)) + new_call = "xf(t)", transition = list(stat = "GIC"), effect = list(type = "fM"))) - expect_equal(transform_formula_chr(formula_details, special_component_info), "log(y) ~ xs(x) * z + xf(t) + log(a)") + expect_equal(transform_formula_chr(formula_metadata, special_component_info), "log(y) ~ xs(x) * z + xf(t) + log(a)") }) test_that("transformed_formula_object returns correct formula form and environment", { - formula <- log(y) ~ xs(x, method_opts = list(type = "pdp")) * z + xf(t, method_opts = list(type = "ice")) + log(a) - formula_details <- get_formula_details(formula, c("y" ,"x", "z", "t", "a")) + formula <- log(y) ~ xs(x, effect = list(type = "pdp"), transition = list(alter = "auto")) * z + + xf(t, effect = list(type = "fM")) + log(a) + formula_metadata <- get_formula_metadata(formula, c("y" ,"x", "z", "t", "a")) set.seed(123) data <- data.frame(y = rnorm(10, 2), x = rnorm(10), z = rnorm(10, 10), t = factor(sample(LETTERS[1:2], 10, replace = TRUE)), a = rexp(10)) - alter <- list(numeric = 'always', factor = 'never') blackbox <- randomForest::randomForest(y ~ ., data) + xs_opts <- list(effect = list(type = "pdp"), + transition = list(alter = "always", monotonic = "not")) + xf_opts <- list(effect = list(type = "fM"), + transition = list(alter = "never", stat = "GIC", value = 3)) - - transformed_formula <- transformed_formula_object(formula_details, blackbox, data, alter) + transformed_formula <- transformed_formula_object(formula_metadata, blackbox, data, gaussian(), xs_opts, xf_opts, aic) env <- attr(transformed_formula, ".Environment") expect_true(inherits(transformed_formula, "formula")) expect_true(all(c("xf", "xs") %in% ls(env))) expect_equal(length(env$xs), 1) expect_equal(length(env$xf), 1) - alter <- list(numeric = 'auto', factor = 'never') compare_stat <- function(lm_model) 1 attr(compare_stat, "higher-better") <- TRUE - transformed_formula <- transformed_formula_object(formula_details, blackbox, data, alter, compare_stat) + transformed_formula <- transformed_formula_object(formula_metadata, blackbox, data, gaussian(), + xs_opts, xf_opts, compare_stat) env <- attr(transformed_formula, ".Environment") expect_true(inherits(transformed_formula, "formula")) diff --git a/vignettes/xspliner.Rmd b/vignettes/xspliner.Rmd index b736868..7086af1 100644 --- a/vignettes/xspliner.Rmd +++ b/vignettes/xspliner.Rmd @@ -43,7 +43,7 @@ In each case, we get a single variable function, which should explain the impact The following pictures show the `ptration` impact on `cmedv` in some randomForest model based on Boston Housing Data. Below curves are obtained by the approach of the pdp and ale methods. -```{r include = FALSE} +```{r include = FALSE, message = FALSE} data(boston) set.seed(123) @@ -53,16 +53,16 @@ boston.rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) # build xspline model with specified response method and approximation options model_pdp <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston.rf ) model_ale <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "ale", K = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "ale", K = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "ale", K = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "ale", K = 40)) + age, model = boston.rf ) @@ -136,7 +136,7 @@ As the algoritm goes threw creating response function and its approximation we n #### Specifying which method should we use to build response function ``` -method_opts = list( +effect = list( type = # "pdp" or "ale", ... # named list - other parameters passed for chosen method ) @@ -153,10 +153,10 @@ Response function is approximated with `mgcv::gam` package and `mgcv::s` smoothi `xspliner` allows using all smoothing methods provided by `mgcv::s`. You can pass corresponding parameters in ``` -transform_opts = # named list +transition = # named list ``` **Remark** -One of special parameters passed for `transform_opts` is `increasing`. +One of special parameters passed for `transition` is `increasing`. When the parameter occurs then: - for `TRUE` value, approximation will be increasing @@ -171,7 +171,7 @@ Response function is presented as data.frame with two columns (in our case): As we want to store 100 values for response function data, we specify in formula: ``` -cmedv ~ rm + lstat + xs(nox, method_opts = list(type = "pdp", grid.resolution = 100)) +cmedv ~ rm + lstat + xs(nox, effect = list(type = "pdp", grid.resolution = 100)) ``` We also want to approximate response function with cubic splines and basis dimension equal to 10. @@ -180,8 +180,8 @@ As we can see in `mgcv::s` documentation, we need to set: `k = 10` and `bs = "cr So we get the final formula: ``` cmedv ~ rm + lstat + xs(nox, - method_opts = list(type = "pdp", grid.resolution = 100), - transform_opts = list(k = 10, bs = "cr")) + effect = list(type = "pdp", grid.resolution = 100), + transition = list(k = 10, bs = "cr")) ``` ### Building the model @@ -202,8 +202,8 @@ As we already have the black box provided, we can use `xspline` function to buil xp_model <- xspline( cmedv ~ rm + lstat + xs(nox, - method_opts = list(type = "pdp", grid.resolution = 100), - transform_opts = list(k = 10, bs = "cr")), + effect = list(type = "pdp", grid.resolution = 100), + transition = list(k = 10, bs = "cr")), model = boston_rf ) ``` @@ -238,16 +238,16 @@ You may find two parameters responsible for that: - `compare_stat` - function of `lm` class object. Defines statistic that should be used in decision between spline model and linear one. The function should have attribute `higher`. When the attribute has `"better"` value then the model with higher statistic value is chosen. You can see the feature in above example: -```{r} +```{r message = FALSE} set.seed(123) boston_rf <- randomForest(cmedv ~ lstat + ptratio + age, data = boston) model_pdp_auto <- xspline( cmedv ~ - xs(lstat, transform_opts = list(k = 6), method_opts = list(type = "pdp", grid.resolution = 60)) + - xs(ptratio, transform_opts = list(k = 4), method_opts = list(type = "pdp", grid.resolution = 40)) + + xs(lstat, transition = list(k = 6), effect = list(type = "pdp", grid.resolution = 60)) + + xs(ptratio, transition = list(k = 4), effect = list(type = "pdp", grid.resolution = 40)) + age, model = boston_rf, - alter = list(numeric = 'auto', factor = 'never') + xs_opts = list(transition = list(alter = "auto")) ) # r_squared statistic is used by default

Training data of model.

env

Environment in which optional variables passed into parameters are stored.

xf_opts

Formula parameters used for factor variable transformatoins inherited from factorMerger package.

xs_opts

Predictive model response method and approximation parameters used for quantitative.

link

Link function that should be used in final model. The passed is used when cannot be extracted from +model. By default 'identity'. See stats::family for possibilities.

family

Family of response variable that should be used in final model. The passed is used when cannot +be extracted from model. By default 'gaussian'. See stats::family for possibilities.

alter

Specifies if each variable transformation should be compared with bare variable usage. -List of the form: list(numeric = type, factor = type), type one of c('always', 'auto', 'never). -'auto' option available only for qualitative variable. The better model is specified by compare_stat parameter then.

env

Environment in which optional variables passed into parameters are stored.

compare_stat

Helper function for building GLM object with transformed variables.

It extracts fitted spline function by xspline_approx_gam.

-

get_xs_call()

+

build_xspliner()

It extracts fitted spline function by xspline_approx_gam.

Helper function for building GLM object with transformed variables.

@@ -142,9 +142,15 @@

r_squared()

+

aic() hoslem()

Statistics used for better linear model selection

+

xf_opts_default xs_opts_default

+

Default parameters for transition methods

diff --git a/docs/reference/stats.html b/docs/reference/stats.html index 7cfd58c..e701b03 100644 --- a/docs/reference/stats.html +++ b/docs/reference/stats.html @@ -91,14 +91,20 @@

Statistics used for better linear model selection

If "higher-better" is TRUE then model with higher statistic value is treated as better one.

-
r_squared(lm_model)
+
aic(glm_model)
+
+hoslem(glm_model)

Arguments

- - + + + + + +
lm_model

Linear model - lm function output.

glm_model

Linear model - glm function output.

family

Family used for fitting the model.

diff --git a/docs/reference/xf_opts_default.html b/docs/reference/xf_opts_default.html new file mode 100644 index 0000000..9123f62 --- /dev/null +++ b/docs/reference/xf_opts_default.html @@ -0,0 +1,127 @@ + + + + + + + + +Default parameters for transition methods — xf_opts_default • xspliner + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + + +

While constructing formula interpreted by xspliner package, some parameters may be specified within xs(..) or xf(..) symbols. +Below are default parameters. See details in vignette("xspliner")

+ + +
xf_opts_default
+
+xs_opts_default
+ +

Format

+ +

An object of class list of length 2.

+ + +
+ +
+ +
+ + +
+

Site built with pkgdown.

+
+ +
+
+ + + + diff --git a/docs/reference/xspline.html b/docs/reference/xspline.html index 04d05dd..80fe6f7 100644 --- a/docs/reference/xspline.html +++ b/docs/reference/xspline.html @@ -92,13 +92,13 @@

Builds predictive model based GLM.

xspline(object, ...)
 
 # S3 method for default
-xspline(model, response = NULL, data = NULL,
-  env = parent.frame(), factor_opts = factor_opts_default,
-  numeric_opts = numeric_opts_default, ...)
+xspline(model, lhs = NULL, response = NULL,
+  predictors = NULL, data = NULL, form = "additive",
+  env = parent.frame(), ...)
 
 # S3 method for formula
 xspline(formula, model, data = NULL,
-  env = parent.frame(), ...)
+  consider = "specials", env = parent.frame(), ...)
 
 # S3 method for explainer
 xspline(explainer, env = parent.frame(), ...)
@@ -118,31 +118,40 @@

Ar

model

Predictive model. Basic model used for extracting predictors transformation.

lhs

Left-hand side of model formula. Can be transformed response.

response

Name of response variable of model.

data

Training data of model.

predictors

Predictor values that should be used in final model.

env

Environment in which optional variables passed into parameters are stored.

data

Training data of model.

factor_opts

Formula parameters used for factor variable transformatoins inherited from factorMerger package.

form

Can be 'additive' (default) or 'multiplicative'. Specifies formula form in final model.

numeric_opts

Predictive model response method and approximation parameters used for numeric +

env

Environment in which optional variables passed into parameters are stored. variables transformation. See vignette("xspliner") for details.

formula

xspliner-specific formula object. Check vignette("xspliner") for more details.

consider

One of c("specials", "all"). If "specials", only components with xs or xf +call are considered in transition.

explainer

Object of class 'explainer' (see DALEX package).