From 82170201c7c815c2ac24a6914f38dc65c1222dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rasmus=20Skytte=20Randl=C3=B8v?= Date: Thu, 7 Mar 2024 14:56:13 +0100 Subject: [PATCH] temp: Remove benchmark vignette --- vignettes/benchmark.Rmd | 114 ---------------------------------------- 1 file changed, 114 deletions(-) delete mode 100644 vignettes/benchmark.Rmd diff --git a/vignettes/benchmark.Rmd b/vignettes/benchmark.Rmd deleted file mode 100644 index 9321c3e4..00000000 --- a/vignettes/benchmark.Rmd +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: "Benchmark" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Benchmark} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -suppressPackageStartupMessages(library(SCDB)) -``` - -```{r benchmark_conns} -# Define list of connections to check -conn_list <- list( - # Backend string = package::function - "SQLite" = "RSQLite::SQLite", - "PostgreSQL" = "RPostgres::Postgres" -) - -get_driver <- function(x = character(), ...) { - if (!grepl(".*::.*", x)) stop("Package must be specified with namespace (e.g. RSQLite::SQLite)!\n", - "Received: ", x) - parts <- strsplit(x, "::")[[1]] - - # Skip unavailable packages - if (!requireNamespace(parts[1], quietly = TRUE)) { - return() - } - - drv <- getExportedValue(parts[1], parts[2]) - - tryCatch(suppressWarnings(get_connection(drv = drv(), ...)), # We expect a warning if no tables are found - error = function(e) { - NULL # Return NULL, if we cannot connect - }) -} - -conns <- lapply(conn_list, get_driver) |> - unlist() - -``` - -```{r benchmark_data} -# Our benchmark data is the iris data set but repeated to increase the data size -data_generator <- function(repeats) { - purrr::map(seq(repeats), - \(it) dplyr::mutate(iris, r = dplyr::row_number() + (it - 1) * nrow(iris))) |> - purrr::reduce(rbind) -} - -data_1 <- data_generator(20) -data_2 <- data_generator(40) |> - dplyr::mutate(Sepal.Length = dplyr::if_else(Sepal.Length > median(Sepal.Length), - Sepal.Length, Sepal.Length / 2)) -data_3 <- data_generator(60) |> - dplyr::mutate(Sepal.Length = dplyr::if_else(Sepal.Length > median(Sepal.Length), - Sepal.Length, Sepal.Length / 2), - Sepal.Width = dplyr::if_else(Sepal.Width > median(Sepal.Width), - Sepal.Width, Sepal.Width / 2)) - -# copy data to the conns -data_on_conns <- purrr::map(conns, ~ { - list(dplyr::copy_to(., data_1, name = "SCDB_data_1", overwrite = TRUE), - dplyr::copy_to(., data_2, name = "SCDB_data_2", overwrite = TRUE), - dplyr::copy_to(., data_3, name = "SCDB_data_3", overwrite = TRUE)) -}) -``` - -```{r benchmark_functions} -# Define the data to loop over for benchmark -ts <- list("2021-01-01", "2021-01-02", "2021-01-03") - -# Define the SCDB update functions -scdb_update_step <- function(conn, data, ts) { - update_snapshot(data, conn, "SCDB_benchmark", timestamp = ts, - logger = Logger$new(output_to_console = FALSE, warn = FALSE)) -} - -scdb_updates <- function(conn, data_on_conn) { - purrr::walk2(data_on_conn, ts, \(data, ts) scdb_update_step(conn, data, ts)) - DBI::dbRemoveTable(conn, name = "SCDB_benchmark") -} - -# Construct the list of benchmarks -benchmark_exprs <- alist() -for (benchmark_id in seq_along(conns)) { - benchmark_on_conn <- local(bquote(scdb_updates(conns[[benchmark_id]], data_on_conns[[benchmark_id]]))) - benchmark_exprs <- append(benchmark_exprs, benchmark_on_conn) -} -names(benchmark_exprs) <- names(conns) - -res <- microbenchmark::microbenchmark(list = benchmark_exprs, - times = 10) - -print(res) -``` - -```{r cleanup} -# remove data from the conns -purrr::walk(conns, ~ { - DBI::dbRemoveTable(., name = "SCDB_data_1") - DBI::dbRemoveTable(., name = "SCDB_data_2") - DBI::dbRemoveTable(., name = "SCDB_data_3") -}) -```