diff --git a/R/utils-model.R b/R/utils-model.R index 2792cfc..aa8d31e 100644 --- a/R/utils-model.R +++ b/R/utils-model.R @@ -103,13 +103,17 @@ utils::globalVariables(c("Observation", "Model", "Value")) plot_model_comparison <- function(x, model, data, compare_with, prediction_functions, sort_by = NULL) { model_name <- rev(as.character(model$call[[1]]))[1] - compare_with$xspliner <- x - compare_with[[model_name]] <- model + models_list <- list(xspliner = x) + models_list[[model_name]] <- model + models_list <- append(models_list, compare_with) if (length(prediction_functions) == 1) { - fitted <- compare_with %>% + fitted <- models_list %>% purrr::map(~ prediction_functions[[1]](., data)) } else { - fitted <- compare_with %>% + if (length(models_list) != length(prediction_functions)) { + stop("prediction_functions should provide prediction functions for all models (surrogate, original and model to compare), or common one.") + } + fitted <- models_list %>% purrr::map2(prediction_functions, function(model, pred_fun) pred_fun(model, data)) } diff --git a/docs/articles/automation.html b/docs/articles/automation.html index cee25a3..dac9949 100644 --- a/docs/articles/automation.html +++ b/docs/articles/automation.html @@ -88,7 +88,7 @@

Automate your work

Krystian Igras

-

2019-06-20

+

2019-08-31

@@ -123,23 +123,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67320 -0.07027 -0.02497 0.10183 0.45924 +## -0.66812 -0.07307 -0.02386 0.10341 0.45717 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.42228 0.22783 -6.243 4.49e-09 *** -## Sepal.Length 0.03024 0.03545 0.853 0.3950 -## xs(Petal.Length) 1.91896 0.36103 5.315 3.94e-07 *** -## xf(Species)versicolor 0.11858 0.16157 0.734 0.4642 -## xf(Species)virginica 0.42614 0.21904 1.945 0.0537 . +## (Intercept) -1.32737 0.21623 -6.139 7.56e-09 *** +## Sepal.Length 0.03222 0.03547 0.908 0.365 +## xs(Petal.Length) 1.85685 0.35456 5.237 5.63e-07 *** +## xf(Species)versicolor 0.08438 0.17014 0.496 0.621 +## xf(Species)virginica 0.39232 0.22838 1.718 0.088 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03081421) +## (Dispersion parameter for gaussian family taken to be 0.03096157) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4681 on 145 degrees of freedom -## AIC: -89.371 +## Residual deviance: 4.4894 on 145 degrees of freedom +## AIC: -88.655 ## ## Number of Fisher Scoring iterations: 2

When the black box model is based on higher amount of variables it can be problematic to specify local parameters for each predictor. Also formula becomes large and hard to read.

@@ -165,23 +165,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67320 -0.07027 -0.02497 0.10183 0.45924 +## -0.66812 -0.07307 -0.02386 0.10341 0.45717 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.42228 0.22783 -6.243 4.49e-09 *** -## Sepal.Length 0.03024 0.03545 0.853 0.3950 -## xs(Petal.Length) 1.91896 0.36103 5.315 3.94e-07 *** -## xf(Species)versicolor 0.11858 0.16157 0.734 0.4642 -## xf(Species)virginica 0.42614 0.21904 1.945 0.0537 . +## (Intercept) -1.32737 0.21623 -6.139 7.56e-09 *** +## Sepal.Length 0.03222 0.03547 0.908 0.365 +## xs(Petal.Length) 1.85685 0.35456 5.237 5.63e-07 *** +## xf(Species)versicolor 0.08438 0.17014 0.496 0.621 +## xf(Species)virginica 0.39232 0.22838 1.718 0.088 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03081421) +## (Dispersion parameter for gaussian family taken to be 0.03096157) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4681 on 145 degrees of freedom -## AIC: -89.371 +## Residual deviance: 4.4894 on 145 degrees of freedom +## AIC: -88.655 ## ## Number of Fisher Scoring iterations: 2

But still you can specify local parameters that override the global ones.

@@ -201,23 +201,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67745 -0.07577 -0.03086 0.09603 0.46022 +## -0.67121 -0.07495 -0.03046 0.09856 0.45944 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5234 0.2594 -5.874 2.80e-08 *** -## xs(Sepal.Length) 0.2598 0.3088 0.841 0.4016 -## xs(Petal.Length) 1.8853 0.3888 4.849 3.16e-06 *** -## xf(Species)versicolor 0.1269 0.1669 0.761 0.4481 -## xf(Species)virginica 0.4404 0.2274 1.936 0.0548 . +## (Intercept) -1.46014 0.26089 -5.597 1.06e-07 *** +## xs(Sepal.Length) 0.31132 0.31424 0.991 0.3235 +## xs(Petal.Length) 1.80229 0.37818 4.766 4.53e-06 *** +## xf(Species)versicolor 0.09996 0.17451 0.573 0.5677 +## xf(Species)virginica 0.41648 0.23573 1.767 0.0794 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03081848) +## (Dispersion parameter for gaussian family taken to be 0.03092842) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4687 on 145 degrees of freedom -## AIC: -89.35 +## Residual deviance: 4.4846 on 145 degrees of freedom +## AIC: -88.816 ## ## Number of Fisher Scoring iterations: 2

In this case last_evaluation variable will be transformed with thin plate regression spline (bs = "tp" is default for mgcv::s) with basis dimension equal to 10. At the same time average_monthly_hours will be transformed with cubic splines.

@@ -279,23 +279,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## xf(Species)versicolor 0.1458 0.1645 0.886 0.3769 -## xf(Species)virginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## xf(Species)versicolor 0.1181 0.1716 0.688 0.4926 +## xf(Species)virginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2

Then each predictor is transformed with xs and xf symbols and use of default parameters or global ones when specified.

@@ -314,23 +314,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## xf(Species)versicolor 0.1458 0.1645 0.886 0.3769 -## xf(Species)virginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## xf(Species)versicolor 0.1181 0.1716 0.688 0.4926 +## xf(Species)virginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2 @@ -354,23 +354,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## Speciesversicolor 0.1458 0.1645 0.886 0.3769 -## Speciesvirginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## Speciesversicolor 0.1181 0.1716 0.688 0.4926 +## Speciesvirginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2

For transformation of factors only:

@@ -422,23 +422,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## xf(Species)versicolor 0.1458 0.1645 0.886 0.3769 -## xf(Species)virginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## xf(Species)versicolor 0.1181 0.1716 0.688 0.4926 +## xf(Species)virginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2

Good practice here is to provide data parameter as well to detect predictors classes, and model type (classification or regression).

@@ -462,23 +462,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## xf(Species)versicolor 0.1458 0.1645 0.886 0.3769 -## xf(Species)virginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## xf(Species)versicolor 0.1181 0.1716 0.688 0.4926 +## xf(Species)virginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2

In above examples each predictor is transformed by default. You can exclude needed, by specifying global alter = "never" parameters, or bare.

@@ -499,23 +499,23 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.67928 -0.07680 -0.03180 0.09561 0.46812 +## -0.67513 -0.07534 -0.03094 0.09496 0.46835 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -1.5206 0.2617 -5.810 3.82e-08 *** -## xs(Sepal.Length) 0.2880 0.3120 0.923 0.3574 -## xs(Petal.Length) 1.8418 0.3845 4.791 4.07e-06 *** -## xf(Species)versicolor 0.1458 0.1645 0.886 0.3769 -## xf(Species)virginica 0.4660 0.2245 2.075 0.0397 * +## (Intercept) -1.4577 0.2641 -5.520 1.52e-07 *** +## xs(Sepal.Length) 0.3332 0.3179 1.048 0.2963 +## xs(Petal.Length) 1.7661 0.3734 4.730 5.27e-06 *** +## xf(Species)versicolor 0.1181 0.1716 0.688 0.4926 +## xf(Species)virginica 0.4411 0.2321 1.901 0.0593 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 0.03089793) +## (Dispersion parameter for gaussian family taken to be 0.03099558) ## ## Null deviance: 86.5699 on 149 degrees of freedom -## Residual deviance: 4.4802 on 145 degrees of freedom -## AIC: -88.964 +## Residual deviance: 4.4944 on 145 degrees of freedom +## AIC: -88.49 ## ## Number of Fisher Scoring iterations: 2

Works! Can it be simpler? Actually not because of black box based transformation and theory, but we can provide some model based parameters upfront using DALEX’s explainer object (see next section).

@@ -555,14 +555,14 @@

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -0.8193 -0.3415 -0.3391 -0.3367 2.4276 +## -0.8194 -0.3415 -0.3391 -0.3367 2.4276 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) -## (Intercept) 25.1067673 31.6322221 0.794 0.42737 -## xs(cmedv) 24.8875891 9.6265474 2.585 0.00973 ** -## rad -0.0008441 0.0214852 -0.039 0.96866 -## xs(lstat) -3.6624715 30.0819347 -0.122 0.90310 +## (Intercept) 1.027e+02 1.203e+02 0.853 0.39357 +## xs(cmedv) 9.507e+01 3.677e+01 2.585 0.00972 ** +## rad -8.456e-04 2.149e-02 -0.039 0.96861 +## xs(lstat) -1.402e+01 1.149e+02 -0.122 0.90292 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## @@ -585,11 +585,19 @@

Just check below example

library(DALEX)
 rf_boston <- randomForest(lstat ~ cmedv + crim + chas, data = boston)
-explainer <- explain(rf_boston, label = "boston")
-model <- xspline(
-  explainer
-)
-summary(model)
+explainer <- explain(rf_boston, label = "boston") +
## Preparation of a new explainer is initiated
+##   -> model label       :  boston 
+##   -> data              :  506  rows  4  cols ([33mextracted from the model[39m)
+##   -> target variable   :  not specified! ([31mWARNING[39m)
+##   -> predict function  :  yhat.randomForest  will be used ([33mdefault[39m)
+##   -> predicted values  :  numerical, min =  5.975977 , mean =  12.65206 , max =  24.93637  
+##   -> residual function :  difference between y and yhat ([33mdefault[39m)
+## [32mA new explainer has been created![39m
+
model <- xspline(
+  explainer
+)
+summary(model)
## 
 ## Call:
 ## stats::glm(formula = lstat ~ xs(cmedv) + xs(crim) + xf(chas), 
@@ -597,22 +605,22 @@ 

## ## Deviance Residuals: ## Min 1Q Median 3Q Max -## -11.2685 -2.3881 -0.6768 1.6969 21.1304 +## -11.2276 -2.3748 -0.6923 1.6867 21.1417 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) -## (Intercept) -15.25719 1.58949 -9.599 < 2e-16 *** -## xs(cmedv) 1.60307 0.06814 23.524 < 2e-16 *** -## xs(crim) 0.58866 0.15639 3.764 0.000187 *** -## xf(chas)1 1.44242 0.69403 2.078 0.038186 * +## (Intercept) -15.95068 1.44893 -11.009 < 2e-16 *** +## xs(cmedv) 1.70141 0.07246 23.480 < 2e-16 *** +## xs(crim) 0.54515 0.14498 3.760 0.00019 *** +## xf(chas)1 1.42731 0.69468 2.055 0.04043 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## -## (Dispersion parameter for gaussian family taken to be 15.3901) +## (Dispersion parameter for gaussian family taken to be 15.41681) ## ## Null deviance: 25752.4 on 505 degrees of freedom -## Residual deviance: 7725.8 on 502 degrees of freedom -## AIC: 2825.2 +## Residual deviance: 7739.2 on 502 degrees of freedom +## AIC: 2826.1 ## ## Number of Fisher Scoring iterations: 2

You can provide your own xspline’s parameters that overwrite that sourced from explainer.

@@ -620,6 +628,7 @@

diff --git a/docs/reference/plot.xspliner.html b/docs/reference/plot.xspliner.html index bdd4b8b..0fd7dd5 100644 --- a/docs/reference/plot.xspliner.html +++ b/docs/reference/plot.xspliner.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,9 +113,7 @@

Plot method for 'xspliner' model

-

Plot method for 'xspliner' model

-
# S3 method for xspliner
 plot(x, variable_names = NULL, model = NULL,
@@ -123,7 +122,7 @@ 

Plot method for 'xspliner' model

sort_by = NULL, compare_with = list(), prediction_funs = list(function(object, newdata) predict(object, newdata)), ...)
- +

Arguments

@@ -180,14 +179,14 @@

Ar

Another arguments passed into model specific method.

- + diff --git a/docs/reference/predict.xspliner.html b/docs/reference/predict.xspliner.html index 5dd50f4..c063403 100644 --- a/docs/reference/predict.xspliner.html +++ b/docs/reference/predict.xspliner.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,13 +113,11 @@

Predict xspliner method

-

Predict xspliner method

-
# S3 method for xspliner
 predict(object, newdata, ...)
- +

Arguments

@@ -135,14 +134,14 @@

Ar

Another arguments passed into predict.glm method.

- + diff --git a/docs/reference/print.xspliner.html b/docs/reference/print.xspliner.html index eb92dd7..b1f5608 100644 --- a/docs/reference/print.xspliner.html +++ b/docs/reference/print.xspliner.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,13 +113,11 @@

Print method for xspliner object

-

Print method for xspliner object

-
# S3 method for xspliner
 print(x, predictor, ...)
- +

Arguments

@@ -135,14 +134,14 @@

Ar

Another arguments passed into model specific print method.

- + diff --git a/docs/reference/stats.html b/docs/reference/stats.html index 7dbb54c..ec6a401 100644 --- a/docs/reference/stats.html +++ b/docs/reference/stats.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,16 +113,14 @@

Statistics used for better linear model selection

-

Used as compare_stat parameter in xspline method. Each function has attribute "higher-better". If "higher-better" is TRUE then model with higher statistic value is treated as better one.

-
aic(glm_model)
 
 hoslem(glm_model)
- +

Arguments

@@ -130,14 +129,14 @@

Ar

Linear model - glm function output.

- + diff --git a/docs/reference/summary.xspliner.html b/docs/reference/summary.xspliner.html index 11bef96..47b213e 100644 --- a/docs/reference/summary.xspliner.html +++ b/docs/reference/summary.xspliner.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,15 +113,13 @@

Summary method for xspliner object

-

Summary method for xspliner object

-
# S3 method for xspliner
 summary(object, predictor, ..., model = NULL,
   newdata = NULL, prediction_funs = list(function(object, newdata)
   predict(object, newdata)), env = parent.frame())
- +

Arguments

@@ -154,7 +153,7 @@

Ar

Environment in which newdata is stored (if not provided as parameter).

- +

Details

The summary output depends strictly on data provided to it.

@@ -185,14 +184,14 @@

Details
  • 1 - Maximum ROC difference$$1 - \max_{t \in T} ||ROC_{o}(t) - ROC_{s}(t)||_{2}$$ Calculates maximum of euclidean distances between ROC points for specified thresholds set T. In this imlplementation T is union of breakpoints for each ROC curve.

  • 1 - Mean ROC difference Above version using mean instead of max measure.

  • -

    Examples

    library(randomForest)
    #> randomForest 4.6-14
    #> Type rfNews() to see new features/changes/bug fixes.
    set.seed(1) data <- iris # regression model iris.rf <- randomForest(Petal.Width ~ Sepal.Length + Petal.Length + Species, data = data) -iris.xs <- xspline(iris.rf)
    #> Cannot extract model family. Use gaussian.
    #> Cannot extract model link. Use identity.
    # Summary of quantitative variable transition +iris.xs <- xspline(iris.rf) +# Summary of quantitative variable transition summary(iris.xs, "Sepal.Length")
    #> #> Family: gaussian #> Link function: identity @@ -234,8 +233,7 @@

    Examp prob_xs <- function(object, newdata) predict(object, newdata = newdata, type = "response") summary(iris.xs, model = iris.rf, newdata = data, prediction_funs = list(prob_xs, prob_rf))

    #> Models comparison #> 1 - Max prediction normed-diff: 0.3908407 -#> R^2: 0.958412 -#> 1 - Max ROC diff: 0.56 +#> R^2: 0.958412
    #> Setting levels: control = versicolor, case = virginica
    #> Setting levels: control = versicolor, case = virginica
    #> Warning: An upcoming version of pROC will set the 'transpose' argument to FALSE by default. Set transpose = TRUE explicitly to keep the current behavior, or transpose = FALSE to adopt the new one and silence this warning. Type help(coords_transpose) for additional information.
    #> Warning: An upcoming version of pROC will set the 'transpose' argument to FALSE by default. Set transpose = TRUE explicitly to keep the current behavior, or transpose = FALSE to adopt the new one and silence this warning. Type help(coords_transpose) for additional information.
    #> 1 - Max ROC diff: 0.56 #> 1 - Mean ROC diff: 0.8443484
    # Prediction as final category response_rf <- function(object, newdata) predict(object, newdata = newdata) response_xs <- function(object, newdata) { @@ -302,9 +300,7 @@

    Examp

    Contents

    diff --git a/docs/reference/transition.html b/docs/reference/transition.html index 4f5ca82..4d3f442 100644 --- a/docs/reference/transition.html +++ b/docs/reference/transition.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,15 +113,13 @@

    Extract variable transformation from xspliner

    -

    Extract variable transformation from xspliner

    -
    transition(model, ...)
     
     # S3 method for xspliner
     transition(model, predictor, type = "function", ...)
    - +

    Arguments

    @@ -143,14 +142,14 @@

    Ar factorMerger.

    - + diff --git a/docs/reference/xf_opts_default.html b/docs/reference/xf_opts_default.html index 4bfbf1a..805792f 100644 --- a/docs/reference/xf_opts_default.html +++ b/docs/reference/xf_opts_default.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,27 +113,24 @@

    Default parameters for transition methods

    -

    While constructing formula interpreted by xspliner package, some parameters may be specified within xs(..) or xf(..) symbols. Below are default parameters. See details in vignette("xspliner")

    -
    xf_opts_default
     
     xs_opts_default
    - + +

    Format

    An object of class list of length 2.

    - diff --git a/docs/reference/xspline-1.png b/docs/reference/xspline-1.png index d704432..a27656d 100644 Binary files a/docs/reference/xspline-1.png and b/docs/reference/xspline-1.png differ diff --git a/docs/reference/xspline-2.png b/docs/reference/xspline-2.png index d704432..a27656d 100644 Binary files a/docs/reference/xspline-2.png and b/docs/reference/xspline-2.png differ diff --git a/docs/reference/xspline-3.png b/docs/reference/xspline-3.png index d704432..a27656d 100644 Binary files a/docs/reference/xspline-3.png and b/docs/reference/xspline-3.png differ diff --git a/docs/reference/xspline.html b/docs/reference/xspline.html index f10147f..760deae 100644 --- a/docs/reference/xspline.html +++ b/docs/reference/xspline.html @@ -13,6 +13,7 @@ + @@ -23,6 +24,7 @@ + @@ -95,7 +97,6 @@ Changelog - @@ -112,11 +113,9 @@

    Builds predictive model based GLM.

    -

    The method provides main functionality on building GLM models with automatic variables transformation. The transformations are based on specified single variable responses for selected black-box model. See details in vignette("xspliner").

    -
    xspline(object, ...)
     
    @@ -133,7 +132,7 @@ 

    Builds predictive model based GLM.

    # S3 method for explainer xspline(object, env = parent.frame(), ...)
    - +

    Arguments

    @@ -184,15 +183,13 @@

    Ar call are considered in transition.

    - +

    Value

    GLM object of class 'xspliner'.

    -

    Details

    model_surrogate_xspliner is a wrapper of xspline method to assure consistency with https://github.com/ModelOriented/DrWhy tools

    -

    Examples

    # preparing blackbox model @@ -204,7 +201,8 @@

    Examp # formula based xspliner xs_iris <- xspline( Petal.Width ~ xs(Sepal.Length) + xs(Petal.Length) + xf(Species), - model = rf_iris)

    #> Cannot extract model family. Use gaussian.
    #> Cannot extract model link. Use identity.
    summary(xs_iris)
    #> + model = rf_iris) +summary(xs_iris)
    #> #> Call: #> stats::glm(formula = Petal.Width ~ xs(Sepal.Length) + xs(Petal.Length) + #> xf(Species), family = family, data = data) @@ -232,7 +230,8 @@

    Examp #> Number of Fisher Scoring iterations: 2 #>

    plot(xs_iris, "Sepal.Length")
    # passing just the model -xs_iris <- xspline(rf_iris)
    #> Cannot extract model family. Use gaussian.
    #> Cannot extract model link. Use identity.
    summary(xs_iris)
    #> +xs_iris <- xspline(rf_iris) +summary(xs_iris)
    #> #> Call: #> stats::glm(formula = Petal.Width ~ xs(Sepal.Length) + xs(Petal.Length) + #> xf(Species), family = family, data = data) @@ -260,8 +259,20 @@

    Examp #> Number of Fisher Scoring iterations: 2 #>

    plot(xs_iris, "Sepal.Length")
    # using DALEX -library(DALEX)
    #> Welcome to DALEX (version: 0.2.4).
    xs_iris_explainer <- explain(rf_iris) -xs_iris <- xspline(rf_iris)
    #> Cannot extract model family. Use gaussian.
    #> Cannot extract model link. Use identity.
    summary(xs_iris)
    #> +library(DALEX)
    #> Registered S3 method overwritten by 'DALEX': +#> method from +#> print.description questionr
    #> Welcome to DALEX (version: 0.4.7). +#> Find examples and detailed introduction at: https://pbiecek.github.io/PM_VEE/ +#> Additional features will be available after installation of: ingredients, iBreakDown. +#> Use 'install_dependencies()' to get all suggested dependencies
    xs_iris_explainer <- explain(rf_iris)
    #> Preparation of a new explainer is initiated +#> -> model label : randomForest (default) +#> -> data : 150 rows 4 cols (extracted from the model) +#> -> target variable : not specified! (WARNING) +#> -> predict function : yhat.randomForest will be used (default) +#> -> predicted values : numerical, min = 0.1991898 , mean = 1.199876 , max = 2.134114 +#> -> residual function : difference between y and yhat (default) +#> A new explainer has been created!
    xs_iris <- xspline(rf_iris) +summary(xs_iris)
    #> #> Call: #> stats::glm(formula = Petal.Width ~ xs(Sepal.Length) + xs(Petal.Length) + #> xf(Species), family = family, data = data) @@ -288,17 +299,14 @@

    Examp #> #> Number of Fisher Scoring iterations: 2 #>

    plot(xs_iris, "Sepal.Length")
    -
    +
    -

    Easy way for approximating data with splines.

    - - + +