Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale plot with coeff #108

Merged
merged 2 commits into from
Sep 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: xspliner
Title: Assisted Model Building, using Surrogate Black-Box Models to Train Interpretable Spline Based Additive Models
Version: 0.0.3.9002
Version: 0.0.3.9003
Authors@R: c(
person("Krystian", "Igras", email = "[email protected]", role = c("aut", "cre")),
person("Przemyslaw", "Biecek", role = c("aut", "ths")))
Expand Down
5 changes: 3 additions & 2 deletions R/methods-xspliner.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@
#' @param compare_with Named list. Other models that should be compared with xspliner and \code{model}.
#' @param n_plots Threshold for number of plots when plotting all variables.
#' @param sort_by When comparing models determines according to which model should observations be ordered.
#' @param use_coeff If TRUE both PDP function and its approximation is scaled with corresponding surrogate model coefficient.
#' @param prediction_funs Prediction functions that should be used in model comparison.
#' @param ... Another arguments passed into model specific method.
#'
#' @export
plot.xspliner <- function(x, variable_names = NULL, model = NULL, plot_response = TRUE, plot_approx = TRUE,
data = NULL, plot_data = FALSE, plot_deriv = FALSE, n_plots = 6, sort_by = NULL,
data = NULL, plot_data = FALSE, plot_deriv = FALSE, n_plots = 6, sort_by = NULL, use_coeff = TRUE,
compare_with = list(), prediction_funs = list(function(object, newdata) predict(object, newdata)),
...) {
if (is.null(model)) {
plot_variable_transition(x, variable_names, plot_response, plot_approx, data, plot_data, plot_deriv, n_plots)
plot_variable_transition(x, variable_names, plot_response, plot_approx, data, plot_data, plot_deriv, n_plots, use_coeff)
} else {
if (is.null(data)) {
stop("Data must be provided.")
Expand Down
16 changes: 11 additions & 5 deletions R/utils-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ specials <- function(model, type = "all") {
}
}

plot_quantitative <- function(x, variable_name, plot_response, plot_approx, data, plot_data, plot_deriv) {
plot_quantitative <- function(x, variable_name, plot_response, plot_approx, data, plot_data, plot_deriv, use_coeff) {
if (plot_data && is.null(data)) {
message("You can plot data points only when data parameter is provided.")
plot_data <- FALSE
Expand All @@ -21,6 +21,10 @@ plot_quantitative <- function(x, variable_name, plot_response, plot_approx, data
stop("You must specify at least one plot.")
}
transition_fun <- transition(x, variable_name, "function")
variable_coeff <- 1
if (use_coeff) {
variable_coeff <- setNames(x$coefficients[-1], all.vars(x$call$formula)[-1])[variable_name]
}

if (plot_data) {
plot_range <- range(data[[variable_name]])
Expand All @@ -40,14 +44,15 @@ plot_quantitative <- function(x, variable_name, plot_response, plot_approx, data
}
if (plot_response) {
data <- transition(x, variable_name, "data")
data$yhat <- variable_coeff * data$yhat
colnames(data)[colnames(data) == "yhat"] <- response_var
names(color_values)[2] <- attr(data, "type")
data$type <- attr(data, "type")
base_data <- rbind(base_data, data)
}
if (plot_approx) {
x_var <- seq(from = plot_range[1], to = plot_range[2], length.out = 50)
y_var <- transition_fun(x_var)
y_var <- variable_coeff * transition_fun(x_var)
data <- data.frame(y_var, x_var)
colnames(data) <- c(response_var, variable_name)
data$type <- "approximation"
Expand All @@ -56,7 +61,7 @@ plot_quantitative <- function(x, variable_name, plot_response, plot_approx, data
if (plot_deriv) {
eps <- (plot_range[2] - plot_range[1]) / 500
x_var <- seq(from = plot_range[1], to = plot_range[2], length.out = 50)[-50]
y_var <- (transition_fun(x_var + eps) - transition_fun(x_var)) / eps
y_var <- variable_coeff * (transition_fun(x_var + eps) - transition_fun(x_var)) / eps
data <- data.frame(y_var, x_var)
colnames(data) <- c(response_var, variable_name)
if (sum(to_plot) == 1) {
Expand Down Expand Up @@ -116,6 +121,7 @@ utils::globalVariables(c("Observation", "Model", "Value"))
#' @param plot_data If TRUE raw data is drawn.
#' @param plot_deriv If TRUE derivative of approximation is showed on plot.
#' @param n_plots Threshold for number of plots when plotting all variables.
#' @param use_coeff If TRUE both PDP function and its approximation is scaled with corresponding surrogate model coefficient.
#'
#' @examples
#' library(randomForest)
Expand All @@ -135,7 +141,7 @@ utils::globalVariables(c("Observation", "Model", "Value"))
#'
#' @export
plot_variable_transition <- function(x, variable_names = NULL, plot_response = TRUE, plot_approx = TRUE,
data = NULL, plot_data = FALSE, plot_deriv = FALSE, n_plots = 6) {
data = NULL, plot_data = FALSE, plot_deriv = FALSE, n_plots = 6, use_coeff = TRUE) {
if (is.null(variable_names)) {
special_vars <- specials(x, "all")
special_vars_to_plot <- special_vars[1:min(n_plots, length(special_vars))]
Expand All @@ -152,7 +158,7 @@ plot_variable_transition <- function(x, variable_names = NULL, plot_response = T
} else if (variable_names %in% specials(x, "qualitative")) {
plot(transition(x, variable_names, "base"))
} else {
plot_quantitative(x, variable_names, plot_response, plot_approx, data, plot_data, plot_deriv)
plot_quantitative(x, variable_names, plot_response, plot_approx, data, plot_data, plot_deriv, use_coeff)
}
}

Expand Down
Loading