From b7f327a86495fd9e7972f280beed6800bb340c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=98topepo=E2=80=99?= <‘mxkuhn@gmail.com’> Date: Tue, 1 Oct 2024 17:49:23 -0400 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- chapters/categorical-predictors.html | 9 ++++++ chapters/contributing.html | 9 ++++++ chapters/embeddings.html | 11 ++++++- chapters/grid-search.html | 15 ++++++++-- chapters/initial-data-splitting.html | 9 ++++++ chapters/interactions-nonlinear.html | 11 ++++++- chapters/introduction.html | 9 ++++++ chapters/missing-data.html | 9 ++++++ chapters/news.html | 43 ++++++++++++++++++---------- chapters/numeric-predictors.html | 9 ++++++ chapters/overfitting.html | 9 ++++++ chapters/resampling.html | 11 ++++++- chapters/whole-game.html | 9 ++++++ index.html | 13 +++++++-- search.json | 2 +- sitemap.xml | 24 ++++++++-------- 17 files changed, 167 insertions(+), 37 deletions(-) diff --git a/.nojekyll b/.nojekyll index 62c55c7..afd587e 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -d9cfce5b \ No newline at end of file +4d3e327f \ No newline at end of file diff --git a/chapters/categorical-predictors.html b/chapters/categorical-predictors.html index 0eb01ad..ea0d941 100644 --- a/chapters/categorical-predictors.html +++ b/chapters/categorical-predictors.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/contributing.html b/chapters/contributing.html index fa2d17d..6077a7a 100644 --- a/chapters/contributing.html +++ b/chapters/contributing.html @@ -68,6 +68,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/embeddings.html b/chapters/embeddings.html index cf910a9..30e377b 100644 --- a/chapters/embeddings.html +++ b/chapters/embeddings.html @@ -93,6 +93,15 @@ "search-label": "Search" } } + + + @@ -807,7 +816,7 @@

New features are added to the model based on the distance between \(\boldsymbol{x}_0\) and \(\boldsymbol{\bar{x}}^*_{c}\). The amount of shrinkage is best optimized using the tuning methods described in later chapters. There are several variations of this specific procedure. Wang and Zhu (2007) describe several different approaches and Efron (2009) demonstrates the connection to Bayesian methods.

-
+
Nearest centroids diff --git a/chapters/grid-search.html b/chapters/grid-search.html index b64919b..1f5dd39 100644 --- a/chapters/grid-search.html +++ b/chapters/grid-search.html @@ -87,6 +87,15 @@ "search-label": "Search" } } + + + @@ -351,7 +360,7 @@

-
+
\begin{algorithm} \begin{algorithmic} \State $\mathfrak{D}^{tr}$: training set of predictors $X$ and outcome $y$ \State $B$: number of resamples \State $M(\mathfrak{D}^{tr}, B)$: a mapping function to split $\mathfrak{D}^{tr}$ for each of $B$ iterations. \State $f()$: model pipeline \State $\Theta$: Parameter set ($s \times m$) with candidates $\theta_j$ \For{$j=1$ \To $s$} \State Generate $\hat{Q}_{j} =$ \Call{Resample}{$\mathfrak{D}^{tr}, f(\cdot;\theta_j), M(\mathfrak{D}^{tr}, B)$} corresponding to candidate $\theta_j$. \EndFor \State Determine $\hat{\theta}_{opt}$ that optimizes $\hat{Q}_{j}$. \end{algorithmic} \end{algorithm}
@@ -497,7 +506,7 @@

-
+
\begin{algorithm} \begin{algorithmic} \State $\mathfrak{D}^{tr}$: training set of predictors $X$ and outcome $y$ \State $B$: number of resamples \State Initial number of resamples $1 \lt B_{min} \lt B$ executed prior to analysis \State $M(\mathfrak{D}^{tr}, B)$: a mapping function to split $\mathfrak{D}^{tr}$ for each of $B$ iterations. \State $f()$: model pipeline \State $\Theta$: Parameter set ($s \times m$) with candidates $\theta_j$ \For{$j=1$ \To $s$} \For{$b=1$ \To $B_{min}$} \State Generate $\hat{Q}_{jb} =$ \Call{Resample}{$\mathfrak{D}^{tr}, f(\cdot;\theta_j), M_b(\mathfrak{D}^{tr}, B)$} \EndFor \State Compute $\hat{Q}_{j} = 1/B_{min}\sum_b \hat{Q}_{jb}$. \EndFor \State Eliminate candidates to produce $\Theta^b$ ($s_b \times m$) \For{$b = B_{min} + 1$ \To $B$} \For{$j=1$ \To $s$} \State Generate $\hat{Q}_{jb} =$ \Call{Resample}{$\mathfrak{D}^{tr}, f(\cdot;\theta_j), M_b(\mathfrak{D}^{tr}, B)$} \State Update candidate subset $\Theta^b$ by applying the filtering analysis \Endfor \Endfor \State Determine $\hat{\theta}_{opt}$ that optimizes $\hat{Q}_j^k$. \end{algorithmic} \end{algorithm}
@@ -555,7 +564,7 @@

<
-
+
diff --git a/chapters/initial-data-splitting.html b/chapters/initial-data-splitting.html index 674a763..424ec73 100644 --- a/chapters/initial-data-splitting.html +++ b/chapters/initial-data-splitting.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/interactions-nonlinear.html b/chapters/interactions-nonlinear.html index 13ccacd..5087018 100644 --- a/chapters/interactions-nonlinear.html +++ b/chapters/interactions-nonlinear.html @@ -93,6 +93,15 @@ "search-label": "Search" } } + + + @@ -369,7 +378,7 @@

In the predictors’ original form, there is a significant overlap between the two classes of samples. However, when the ratio of the predictors is used, the newly derived predictor better discriminates between classes (shown in Figure 8.1(b)). While not a general rule, the three data characteristics above suggest that the modeler attempts to form ratios from two or more predictors 1.

-
+
Panel (a): two highly correlated, right-skewed predictors with two classes. Panel (b): separation of classes using $log(A/B)$. diff --git a/chapters/introduction.html b/chapters/introduction.html index 4522ac4..c7dc34a 100644 --- a/chapters/introduction.html +++ b/chapters/introduction.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/missing-data.html b/chapters/missing-data.html index 65779f0..2776d52 100644 --- a/chapters/missing-data.html +++ b/chapters/missing-data.html @@ -93,6 +93,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/news.html b/chapters/news.html index 4b37686..b73d9dd 100644 --- a/chapters/news.html +++ b/chapters/news.html @@ -68,6 +68,15 @@ "search-label": "Search" } } + + + @@ -283,40 +292,44 @@

Errata

Changelog

-

2024-09-23

+

2024-10-01

+

Enabled google analytics.

+
+
+

2024-09-23

Chapter 4 is now on missing data, bumping subsequent chapter numbers.

New chapters on resampling and grid search are now online.

-
-

2024-06-17

+
+

2024-06-17

Several new chapters on embeddings, splines/interactions/discretization, and overfitting. Also, shinylive is used for interactive visualizations of concepts.

-
-

2024-01-02

+
+

2024-01-02

Fixed various typos.

-
-

2023-12-21

+
+

2023-12-21

Typo fix on main page.

-
-

2023-12-19

+
+

2023-12-19

Small updates.

Updated snapshot.

The background color of premade outputs has been changed to the pages background color (#16).

-
-

2023-12-11

+
+

2023-12-11

No new content.

Updated snapshot.

Includes new Google Scholar links for articles (#8)

-
-

2023-11-17

+
+

2023-11-17

Updated renv snapshot.

-
-

2023-10-09

+
+

2023-10-09

First committed versions.

diff --git a/chapters/numeric-predictors.html b/chapters/numeric-predictors.html index 2424d54..04ab540 100644 --- a/chapters/numeric-predictors.html +++ b/chapters/numeric-predictors.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/overfitting.html b/chapters/overfitting.html index b8189b8..c633e0c 100644 --- a/chapters/overfitting.html +++ b/chapters/overfitting.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/chapters/resampling.html b/chapters/resampling.html index 21c74d8..abaf041 100644 --- a/chapters/resampling.html +++ b/chapters/resampling.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + @@ -396,7 +405,7 @@

-
+
\begin{algorithm} \begin{algorithmic} \State $\mathfrak{D}^{tr}$: training set of predictors $X$ and outcome $y$ \State $B$: number of resamples \State $M(\mathfrak{D}^{tr}, B)$: a mapping function to split $\mathfrak{D}^{tr}$ for each of $B$ iterations. \State $f()$: model pipeline \Procedure{Resample}{$\mathfrak{D}^{tr}, f, M(\mathfrak{D}^{tr}, B)$} \For{$b =1$ \To $B$} \State Partition $\mathfrak{D}^{tr}$ into $\{\mathfrak{D}_b^{fit}, \mathfrak{D}_b^{pred}\}$ using $M_b(\mathfrak{D}^{tr}, B)$. \State Train model pipeline $f$ on the analysis set to produce $\hat{f}_{b}(\mathfrak{D}_b^{fit})$. \State Generate assessment set predictions $\hat{y}_b$ by applying model $\hat{f}_{b}$ to $\mathfrak{D}_b^{pred}$. \State Estimate performance statistic $\hat{Q}_{b}$. \EndFor \State Compute reampling estimate $\hat{Q} = \sum_{b=1}^B \hat{Q}_{b}$. \Return $\hat{Q}$. \Endprocedure \end{algorithmic} \end{algorithm}
diff --git a/chapters/whole-game.html b/chapters/whole-game.html index decba30..e29efd9 100644 --- a/chapters/whole-game.html +++ b/chapters/whole-game.html @@ -88,6 +88,15 @@ "search-label": "Search" } } + + + diff --git a/index.html b/index.html index 6eb0dac..fec4e15 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@ - + Applied Machine Learning for Tabular Data