diff --git a/vignettes/oem_vignette.html b/vignettes/oem_vignette.html index f4cf4ec..aa67046 100644 --- a/vignettes/oem_vignette.html +++ b/vignettes/oem_vignette.html @@ -356,7 +356,7 @@

3 Quick Start

fit1 <- oem(x = x, y = y, penalty = "lasso")

Plot the solution path

plot(fit1)
-

+

4 Key Features

@@ -375,30 +375,50 @@

4.1 Available functions

Main fitting function -predict.oemfit() -Prediction for oem objects +oem.xtx() +Fitting function for precomputed \(X'X,X'y\) -plot.oemfit() -Plotting for oem objects +big.oem() +Fitting function for big.matrix() objects -logLik.oemfit() -log Likelihood for oem objects +summary.oemfit() +Summary for oem objects + + +predict.oemfit() +Prediction for oem objects + + +plot.oemfit() +Plotting for oem objects +logLik.oemfit() +log Likelihood for oem objects + + cv.oem() Cross-validation function + +xval.oem() +Fast cross-validation for linear models + -predict.cv.oem() -Prediction for cv.oem objects +summary.cv.oem() +Summary for cv.oem objects -plot.cv.oem() -Plotting for cv.oem objects +predict.cv.oem() +Prediction for cv.oem objects +plot.cv.oem() +Plotting for cv.oem objects + + logLik.cv.oem() log Likelihood for cv.oem objects @@ -525,7 +545,7 @@

5 Fitting multiple penalties at o plot(fit2, which.model = 2, xvar = "lambda") plot(fit2, which.model = 3, xvar = "lambda") plot(fit2, which.model = "grp.mcp", xvar = "lambda")

-

+

5.1 Timing Comparison

The following is a demonstration of oem’s efficiency for computing solutions for tuning parameter paths for multiple penalties at once.

@@ -540,7 +560,7 @@

5.1.1 Linear Regression

system.time(fit2a <- oem(x = x2, y = y2, penalty = c("grp.lasso"), groups = rep(1:20, each = 5), nlambda = 100L))
##    user  system elapsed 
-##    0.21    0.03    0.24
+## 0.23 0.00 0.23
system.time(fit2b <- oem(x = x2, y = y2, 
                          penalty = c("grp.lasso", "lasso", "mcp", 
                                      "scad", "elastic.net", "grp.mcp",
@@ -554,7 +574,7 @@ 

5.1.1 Linear Regression

"grp.scad", "sparse.grp.lasso"), groups = rep(1:20, each = 5), nlambda = 500L))
##    user  system elapsed 
-##    0.36    0.04    0.39
+## 0.34 0.01 0.39

5.1.2 Logistic Regression

@@ -570,12 +590,12 @@

5.1.2 Logistic Regression

family = "binomial", groups = rep(1:20, each = 5), nlambda = 100L))
##    user  system elapsed 
-##    2.07    0.02    2.13
+## 2.59 0.02 2.64
system.time(fit2b <- oem(x = x2, y = y2, penalty = c("grp.lasso", "lasso", "mcp", "scad", "elastic.net"),
                          family = "binomial",
                          groups = rep(1:20, each = 5), nlambda = 100L))
##    user  system elapsed 
-##   10.26    0.03   10.57
+## 10.53 0.14 10.83 @@ -589,14 +609,14 @@

6 Cross Validation

groups = rep(1:20, each = 5), nfolds = 10))
##    user  system elapsed 
-##    1.29    0.14    1.45
+## 1.53 0.14 1.67

Plot the cross validation mean squared error results for each model

layout(matrix(1:4, ncol = 2))
 plot(cvfit1, which.model = 1)
 plot(cvfit1, which.model = 2)
 plot(cvfit1, which.model = 3)
 plot(cvfit1, which.model = 4)
-

+

6.1 Extremely Fast Cross Validation for Linear Models

The function xval.oem offers accelerated cross validation for penalized linear models. In many cases is is orders of magnitude faster than cv.oem. It is only recommended for scenarios where the number of observations is larger than the number of variables. In addition to the computational gains in single-core usage, it also benefits from parallelizaton using OpenMP (instead of using foreach, as used by cv.oem). For large enough problems, it has on a similar order of computation time as just fitting one OEM model.

@@ -609,21 +629,21 @@

6.1 Extremely Fast Cross Validati groups = rep(1:20, each = 5), nfolds = 10))

##    user  system elapsed 
-##    5.36    0.61    6.08
+## 5.30 0.89 6.27
system.time(xvalfit1 <- xval.oem(x = xc, y = yc, penalty = "lasso",
                                  groups = rep(1:20, each = 5), 
                                  nfolds = 10))
##    user  system elapsed 
-##    0.97    0.01    1.08
+## 0.86 0.08 0.94
system.time(xvalfit2 <- xval.oem(x = xc, y = yc, penalty = "lasso",
                                  groups = rep(1:20, each = 5), 
                                  nfolds = 10, ncores = 2))
##    user  system elapsed 
-##    1.21    0.02    0.82
+## 1.03 0.03 0.69
system.time(ofit1 <- oem(x = xc, y = yc, penalty = "lasso",
                          groups = rep(1:20, each = 5)))
##    user  system elapsed 
-##    0.17    0.05    0.22
+## 0.20 0.02 0.22

6.2 Evaluation Metrics

@@ -694,10 +714,10 @@

6.2.1 Misclassification Rate

gamma = 2, groups = rep(1:10, each = 2), nfolds = 10, standardize = FALSE)
-

+

In this case, misclassification rate is not the best indicator of performance. The classes here are imbalanced:

mean(y)
-
## [1] 0.065
+
## [1] 0.0705

6.2.2 Area Under the ROC Curve

@@ -709,7 +729,7 @@

6.2.2 Area Under the ROC Curve gamma = 2, groups = rep(1:10, each = 2), nfolds = 10, standardize = FALSE)

-

+

@@ -727,16 +747,16 @@

7.1 OEM with Precomputed standardize = FALSE, intercept = FALSE, groups = rep(1:20, each = 5)))
##    user  system elapsed 
-##    0.19    0.02    0.20
+## 0.19 0.04 0.23
##    user  system elapsed 
-##    0.02    0.00    0.01
+## 0.01 0.00 0.02 -
## [1] 1.332268e-14
+
## [1] 9.992007e-15
-
## [1] 1.354472e-14
+
## [1] 9.992007e-15
col.std <- apply(xc, 2, sd)
 fit.xtx.s <- oem.xtx(xtx = xtx, xty = xty, 
                      scale.factor = col.std,
@@ -793,13 +813,13 @@ 

8.1 Parallelization via OpenMP standardize = FALSE, intercept = FALSE, groups = rep(1:20, each = 25)))

##    user  system elapsed 
-##    3.18    0.09    3.29
+## 5.03 0.27 6.12
##    user  system elapsed 
-##    3.96    0.08    2.27
+## 5.55 0.12 4.48

8.2 Penalty Adjustment

@@ -826,7 +846,7 @@

8.2 Penalty Adjustment

group.weights = 1 / group.norms, groups = group.indicators, lambda = lams)
-

+