diff --git a/docs/404.html b/docs/404.html index 2a2546e..e0e69fd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -38,7 +38,7 @@
diff --git a/docs/articles/anomalize_methods.html b/docs/articles/anomalize_methods.html index 8d3d364..3fc9014 100644 --- a/docs/articles/anomalize_methods.html +++ b/docs/articles/anomalize_methods.html @@ -39,7 +39,7 @@ @@ -97,7 +97,7 @@vignettes/anomalize_methods.Rmd
anomalize_methods.Rmd
Load two libraries to perform the comparison.
+library(anomalize) + +# NOTE: timetk now has anomaly detection built in, which +# will get the new functionality going forward. + +anomalize <- anomalize::anomalize +plot_anomalies <- anomalize::plot_anomaliesCollect data on the daily downloads of the lubridate
package. This comes from the data set,
tidyverse_cran_downloads
that is part of
diff --git a/docs/articles/anomalize_quick_start_guide.html b/docs/articles/anomalize_quick_start_guide.html
index 2b0e2e5..73f4ba7 100644
--- a/docs/articles/anomalize_quick_start_guide.html
+++ b/docs/articles/anomalize_quick_start_guide.html
@@ -39,7 +39,7 @@
@@ -97,7 +97,7 @@
vignettes/anomalize_quick_start_guide.Rmd
anomalize_quick_start_guide.Rmd
library(tidyverse)
library(tibbletime)
-library(anomalize)
+library(anomalize)
+
+# NOTE: timetk now has anomaly detection built in, which
+# will get the new functionality going forward.
+
+anomalize <- anomalize::anomalize
+plot_anomalies <- anomalize::plot_anomalies
Get some data. We’ll use the tidyverse_cran_downloads
data set that comes with anomalize
. A few points:
We can use the general workflow for anomaly detection, which involves three main functions:
Next, let’s perform anomaly detection.
lubridate_daily_downloads_anomalized <- lubridate_daily_downloads %>%
diff --git a/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-11-1.png
index 5a43148..d0a1ee7 100644
Binary files a/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-11-1.png and b/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-11-1.png differ
diff --git a/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-9-2.png b/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-9-2.png
index 139c6ed..523f833 100644
Binary files a/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-9-2.png and b/docs/articles/anomalize_quick_start_guide_files/figure-html/unnamed-chunk-9-2.png differ
diff --git a/docs/articles/forecasting_with_cleaned_anomalies.html b/docs/articles/forecasting_with_cleaned_anomalies.html
index a60366d..2991429 100644
--- a/docs/articles/forecasting_with_cleaned_anomalies.html
+++ b/docs/articles/forecasting_with_cleaned_anomalies.html
@@ -39,7 +39,7 @@
vignettes/forecasting_with_cleaned_anomalies.Rmd
forecasting_with_cleaned_anomalies.Rmd
library(tidyverse)
library(tidyquant)
library(anomalize)
-library(timetk)
+library(timetk)
+
+# NOTE: timetk now has anomaly detection built in, which
+# will get the new functionality going forward.
+# Use this script to prevent overwriting legacy anomalize:
+
+anomalize <- anomalize::anomalize
+plot_anomalies <- anomalize::plot_anomalies
Here is a short example with the
Let’s take one package with some extreme events. We can hone in on
Dancho M, Vaughan D (2023).
anomalize: Tidy Anomaly Detection.
-R package version 0.2.3, https://github.com/business-science/anomalize.
+R package version 0.3.0, https://github.com/business-science/anomalize.
The The original To prevent the new Tidy anomaly detection You can install the development version with Load the Next, let’s get some data. Suppose we want to determine which daily download “counts” are anomalous. It’s as easy as using the three main functions ( Yes! Anomalize has a new function, Business Science offers two 1-hour courses on Anomaly Detection: Learning Lab 18 - Time Series Anomaly Detection with Learning Lab 17 - Anomaly Detection with Prepare for supercession by Bug Fixestidyverse_cran_downloads
dataset that comes with
anomalize
. We’ll see how we can reduce the forecast
@@ -144,7 +151,7 @@ Example - Reducing Forecasti
#> 8 2017-01-08 1556 tidyr
#> 9 2017-01-09 3678 tidyr
#> 10 2017-01-10 7086 tidyr
-#> # … with 6,365 more rows
+#> # ℹ 6,365 more rows
lubridate
, which has some outliers that we can fix.
@@ -194,13 +201,13 @@
Forecasting Lubridate Downloads model_formula <- as.formula(paste0(quo_name(predict_expr), " ~ index.num + year + quarter + month.lbl + day + wday.lbl"))
model_glm <- train_tbl %>%
- tk_augment_timeseries_signature() %>%
+ tk_augment_timeseries_signature() %>%
glm(model_formula, data = .)
# Make Prediction
suppressWarnings({
# Suppress rank-deficit warning
- prediction <- predict(model_glm, newdata = test_tbl %>% tk_augment_timeseries_signature())
+ prediction <- predict(model_glm, newdata = test_tbl %>% tk_augment_timeseries_signature())
actual <- test_tbl %>% pull(!! actual_expr)
})
@@ -235,20 +242,20 @@
Workflow for Cleaning Anomalieslubridate_anomalized_tbl
#> # A time tibble: 425 × 9
#> # Index: date
-#> date observed season trend remainder remainde…¹ remai…² anomaly obser…³
-#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
-#> 1 2017-01-01 643 -2078. 2474. 246. -3323. 3310. No 643
-#> 2 2017-01-02 1350 518. 2491. -1659. -3323. 3310. No 1350
-#> 3 2017-01-03 2940 1117. 2508. -685. -3323. 3310. No 2940
-#> 4 2017-01-04 4269 1220. 2524. 525. -3323. 3310. No 4269
-#> 5 2017-01-05 3724 865. 2541. 318. -3323. 3310. No 3724
-#> 6 2017-01-06 2326 356. 2558. -588. -3323. 3310. No 2326
-#> 7 2017-01-07 1107 -1998. 2574. 531. -3323. 3310. No 1107
-#> 8 2017-01-08 1058 -2078. 2591. 545. -3323. 3310. No 1058
-#> 9 2017-01-09 2494 518. 2608. -632. -3323. 3310. No 2494
-#> 10 2017-01-10 3237 1117. 2624. -504. -3323. 3310. No 3237
-#> # … with 415 more rows, and abbreviated variable names ¹remainder_l1,
-#> # ²remainder_l2, ³observed_cleaned
Before Cleaning with anomalize
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 95f7bae..2e2cd9b 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -23,7 +23,7 @@
Citation
@Manual{,
title = {anomalize: Tidy Anomaly Detection},
author = {Matt Dancho and Davis Vaughan},
year = {2023},
- note = {R package version 0.2.3},
+ note = {R package version 0.3.0},
url = {https://github.com/business-science/anomalize},
}
diff --git a/docs/index.html b/docs/index.html
index ce519c0..384188c 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -50,7 +50,7 @@
@@ -105,9 +105,22 @@
anomalize
+
-
+anomalize
package functionality has been superceded by timetk
. We suggest you begin to use the timetk::anomalize()
to benefit from enhanced functionality to get improvements going forward. Learn more about Anomaly Detection with timetk
here.anomalize
package functionality will be maintained for previous code bases that use the legacy functionality.timetk
functionality from conflicting with old anomalize
code, use these lines:
+
library(anomalize)
+
+anomalize <- anomalize::anomalize
+plot_anomalies <- anomalize::plot_anomalies
anomalize
+
+
@@ -122,7 +135,7 @@ Anomalize In 2 Minutes (YouTube)Installation
devtools
or the most recent CRAN version with install.packages()
:
+
# devtools::install_github("business-science/anomalize")
install.packages("anomalize")
How It WorksGetting Started
tidyverse
and anomalize
packages.
+
+library(anomalize)
+
+# NOTE: timetk now has anomaly detection built in, which
+# will get the new functionality going forward.
+# Use this script to prevent overwriting legacy anomalize:
+
+anomalize <- anomalize::anomalize
+plot_anomalies <- anomalize::plot_anomalies
anomalize
ships with a data set called tidyverse_cran_downloads
that contains the daily CRAN download counts for 15 “tidy” packages from 2017-01-01 to 2018-03-01.time_decompose()
, anomalize()
, and time_recompose()
) along with a visualization function, plot_anomalies()
.
+
tidyverse_cran_downloads %>%
# Data Manipulation / Anomaly Detection
time_decompose(count, method = "stl") %>%
@@ -164,7 +184,7 @@
Getting StartedReducing Forecast Error by 32%
clean_anomalies()
, that can be used to repair time series prior to forecasting. We have a brand new vignette - Reduce Forecast Error (by 32%) with Cleaned Anomalies.
+
tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup() %>%
@@ -176,8 +196,8 @@
Reducing Forecast Error by 32%select(date, anomaly, observed, observed_cleaned) %>%
filter(anomaly == "Yes")
-#> # A time tibble: 19 x 4
-#> # Index: date
+#> # A time tibble: 19 × 4
+#> # Index: date
#> date anomaly observed observed_cleaned
#> <date> <chr> <dbl> <dbl>
#> 1 2017-01-12 Yes -1.14e-13 3522.
@@ -196,9 +216,9 @@
Reducing Forecast Error by 32%#> 14 2018-02-07 Yes 1.19e+ 4 8539.
#> 15 2018-02-08 Yes 1.17e+ 4 8237.
#> 16 2018-02-09 Yes -5.68e-14 7780.
-#> 17 2018-02-10 Yes 0. 5478.
+#> 17 2018-02-10 Yes 0 5478.
#> 18 2018-02-23 Yes -5.68e-14 8519.
-#> 19 2018-02-24 Yes 0. 6218.
But Wait, There’s More!
@@ -208,7 +228,7 @@
But Wait, There’s More!
plot_anomaly_decomposition()
for visualizing the inner workings of how algorithm detects anomalies in the “remainder”.
-
+
tidyverse_cran_downloads %>%
filter(package == "lubridate") %>%
ungroup() %>%
@@ -235,19 +255,8 @@
Interested in Learning Anomaly
-
-
-anomalize
-
-
-H2O
Machine Learninganomalize
H2O
Machine LearningChangelog
Source: NEWS.md
anomalize 0.3.0
+timetk
. Note that anomalize
R package will be maintained for backwards compatibility. Users may wish to add these 2 lines of code to existing codebases that use the legacy anomalize R package:
+
+library(anomalize)
+
+anomalize <- anomalize::anomalize
+plot_anomalies <- anomalize::plot_anomalies
anomalize 0.2.22020-10-20
See also
Examples
-
+
if (FALSE) {
library(dplyr)
-#>
-#> Attaching package: ‘dplyr’
-#> The following objects are masked from ‘package:stats’:
-#>
-#> filter, lag
-#> The following objects are masked from ‘package:base’:
-#>
-#> intersect, setdiff, setequal, union
# Needed to pass CRAN check / This is loaded by default
set_time_scale_template(time_scale_template())
@@ -198,24 +190,7 @@
Examples
tidyverse_cran_downloads %>%
time_decompose(count, method = "stl") %>%
anomalize(remainder, method = "iqr")
-#> # A time tibble: 6,375 × 9
-#> # Index: date
-#> # Groups: package [15]
-#> package date observed season trend remainder remainde…¹ remai…² anomaly
-#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
-#> 1 broom 2017-01-01 1053 -1007. 1708. 352. -1725. 1704. No
-#> 2 broom 2017-01-02 1481 340. 1731. -589. -1725. 1704. No
-#> 3 broom 2017-01-03 1851 563. 1753. -465. -1725. 1704. No
-#> 4 broom 2017-01-04 1947 526. 1775. -354. -1725. 1704. No
-#> 5 broom 2017-01-05 1927 430. 1798. -301. -1725. 1704. No
-#> 6 broom 2017-01-06 1948 136. 1820. -8.11 -1725. 1704. No
-#> 7 broom 2017-01-07 1542 -988. 1842. 688. -1725. 1704. No
-#> 8 broom 2017-01-08 1479 -1007. 1864. 622. -1725. 1704. No
-#> 9 broom 2017-01-09 2057 340. 1887. -169. -1725. 1704. No
-#> 10 broom 2017-01-10 2278 563. 1909. -194. -1725. 1704. No
-#> # … with 6,365 more rows, and abbreviated variable names ¹remainder_l1,
-#> # ²remainder_l2
-
+}
See also
Examples
+if (FALSE) {
library(dplyr)
# Needed to pass CRAN check / This is loaded by default
@@ -122,24 +123,7 @@
Examples
time_decompose(count, method = "stl") %>%
anomalize(remainder, method = "iqr") %>%
clean_anomalies()
-#> # A time tibble: 6,375 × 10
-#> # Index: date
-#> # Groups: package [15]
-#> package date observed season trend remainder remainde…¹ remai…² anomaly
-#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
-#> 1 broom 2017-01-01 1053 -1007. 1708. 352. -1725. 1704. No
-#> 2 broom 2017-01-02 1481 340. 1731. -589. -1725. 1704. No
-#> 3 broom 2017-01-03 1851 563. 1753. -465. -1725. 1704. No
-#> 4 broom 2017-01-04 1947 526. 1775. -354. -1725. 1704. No
-#> 5 broom 2017-01-05 1927 430. 1798. -301. -1725. 1704. No
-#> 6 broom 2017-01-06 1948 136. 1820. -8.11 -1725. 1704. No
-#> 7 broom 2017-01-07 1542 -988. 1842. 688. -1725. 1704. No
-#> 8 broom 2017-01-08 1479 -1007. 1864. 622. -1725. 1704. No
-#> 9 broom 2017-01-09 2057 340. 1887. -169. -1725. 1704. No
-#> 10 broom 2017-01-10 2278 563. 1909. -194. -1725. 1704. No
-#> # … with 6,365 more rows, 1 more variable: observed_cleaned <dbl>, and
-#> # abbreviated variable names ¹remainder_l1, ²remainder_l2
-
+}
See also
Examples
library(dplyr)
+#>
+#> Attaching package: ‘dplyr’
+#> The following objects are masked from ‘package:stats’:
+#>
+#> filter, lag
+#> The following objects are masked from ‘package:base’:
+#>
+#> intersect, setdiff, setequal, union
tidyverse_cran_downloads %>%
ungroup() %>%
@@ -158,7 +166,7 @@
Examples
#> 8 2017-01-08 32 -19.8 27.6 24.2
#> 9 2017-01-09 70 12.4 27.6 30.0
#> 10 2017-01-10 33 11.3 27.6 -5.95
-#> # … with 415 more rows
+#> # ℹ 415 more rows
General