Skip to content

Commit

Permalink
Merge pull request #151 from ssi-dk/restore-main
Browse files Browse the repository at this point in the history
Restore `main` after release of v0.4.1
  • Loading branch information
RasmusSkytte authored Oct 4, 2024
2 parents f27cc9c + 8066e29 commit 75a17b8
Show file tree
Hide file tree
Showing 19 changed files with 1,256 additions and 187 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ man-roxygen/*
^Meta$
^README.Rmd$
^revdep$
^data-raw$
225 changes: 225 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
on:
workflow_dispatch


name: "⏱️ Benchmark"
jobs:
benchmark:
runs-on: ubuntu-latest

services:
postgres:
image: postgres:latest
env:
POSTGRES_DB: test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: --health-cmd "pg_isready -U postgres" --health-interval 10s --health-timeout 5s --health-retries 5

env:
PGHOST: localhost
PGPORT: 5432
PGDATABASE: test
PGUSER: postgres
PGPASSWORD: postgres

steps:
- name: Install a SQL Server suite of tools
uses: potatoqualitee/[email protected]
with:
install: sqlengine, sqlpackage, sqlclient
show-log: true

- name: Configure SQL server
run: |
set -o xtrace
sqlcmd -V 10 -S localhost -U SA -P dbatools.I0 -Q "ALTER LOGIN SA WITH DEFAULT_DATABASE = master;"
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false

- name: Configure git
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "[email protected]"
git switch ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::devtools


- name: Delete previous benchmark files
if: always()
run: rm -rf inst/extdata/benchmark-*.rds



- name: Get SQLite version
run: |
version=$(Rscript -e "cat(DBI::dbGetQuery(DBI::dbConnect(RSQLite::SQLite()), 'SELECT sqlite_version();')[[1]])")
echo "SQLITE_VERSION=SQLite v$version" >> $GITHUB_ENV
- name: Get DuckDB version
run: |
version=$(Rscript -e "cat(DBI::dbGetQuery(DBI::dbConnect(duckdb::duckdb()), 'SELECT version();')[[1]])")
echo "DUCKDB_VERSION=DuckDB $version" >> $GITHUB_ENV
- name: Get PostgreSQL version
run: |
version=$(psql --version | awk '{print $3}')
echo "POSTGRES_VERSION=PostgreSQL v$version" >> $GITHUB_ENV
- name: Get SQL Server version
run: |
version=$(sqlcmd -S localhost -U SA -P dbatools.I0 -Q "SET NOCOUNT ON; SELECT SERVERPROPERTY('productversion') AS version" -h -1 -W -b)
echo "SQL_SERVER_VERSION=SQL Server v$version" >> $GITHUB_ENV
- name: Install libraries to benchmark
if: always()
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}



- name: Run benchmark (${{ env.SQLITE_VERSION }})
if: always()
env:
BACKEND: ${{ env.SQLITE_VERSION }}
BACKEND_DRV: RSQLite::SQLite
BACKEND_ARGS: 'list(dbname = file.path(tempdir(), "SQLite.SQLite"))'
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.DUCKDB_VERSION }})
if: always()
env:
BACKEND: ${{ env.DUCKDB_VERSION }}
BACKEND_DRV: duckdb::duckdb
BACKEND_ARGS: 'list(dbdir = file.path(tempdir(), "DuckDB.duckdb"))'
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.POSTGRES_VERSION }})
if: always()
env:
BACKEND: ${{ env.POSTGRES_VERSION }}
BACKEND_DRV: RPostgres::Postgres
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.SQL_SERVER_VERSION }})
if: always()
env:
BACKEND: ${{ env.SQL_SERVER_VERSION }}
BACKEND_DRV: odbc::odbc
CONN_ARGS_JSON: >
{
"${{ env.SQL_SERVER_VERSION }}": {
"driver": "ODBC Driver 17 for SQL Server",
"server": "localhost",
"database": "master",
"UID": "SA",
"PWD": "dbatools.I0"
}
}
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}



- name: Display structure of benchmark files
if: always()
run: ls -R data

- name: Combine benchmark results
if: always()
run: |
benchmark_files <- list.files(
"data",
pattern = "^benchmark-",
full.names = TRUE,
recursive = TRUE
)
benchmarks <- benchmark_files |>
purrr::map(readRDS) |>
purrr::map(tibble::as_tibble) |>
purrr::reduce(rbind)
benchmarks <- benchmarks |>
dplyr::mutate(
"version" = factor(
.data$version,
levels = c("CRAN", "main", setdiff(unique(benchmarks$version), c("CRAN", "main")))
)
)
# Save the combined benchmark results and delete the individual files
dir.create(file.path("inst", "extdata"), recursive = TRUE, showWarnings = FALSE)
saveRDS(benchmarks, file.path("inst", "extdata", "benchmarks.rds"))
file.remove(benchmark_files)
# Add note slow backends
slow_backends <- benchmarks |>
dplyr::distinct(.data$database, .data$n) |>
dplyr::filter(.data$n < max(.data$n)) |>
dplyr::pull("database")
benchmarks <- benchmarks |>
dplyr::mutate("database" = paste0(database, ifelse(database %in% slow_backends, "*", "")))
# Mean and standard deviation (see ggplot2::mean_se())
mean_sd <- function(x) {
mu <- mean(x)
sd <- sd(x)
data.frame(y = mu, ymin = mu - sd, ymax = mu + sd)
}
g <- ggplot2::ggplot(
benchmarks,
ggplot2::aes(x = version, y = time / 1e9)
) +
ggplot2::stat_summary(fun.data = mean_sd, geom = "pointrange", size = 0.5, linewidth = 1) +
ggplot2::facet_grid(rows = ggplot2::vars(benchmark_function), cols = ggplot2::vars(database)) +
ggplot2::labs(x = "Codebase version", y = "Time (s)")
if (length(slow_backends) > 1) {
g <- g + ggplot2::labs(caption = "* IMPORTANT: Benchmark data halved for this backend!")
}
ggplot2::ggsave("benchmarks.pdf")
shell: Rscript {0}

- name: Upload benchmark summary
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-summary
path: benchmarks.pdf

- name: Commit and push changes
run: |
git remote set-url origin https://$GITHUB_ACTOR:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY.git
git stash --include-untracked
git pull --rebase origin ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
git stash list | grep stash@{0} && git stash pop || echo "No stash to pop"
git add inst/extdata/\*
git commit -m "chore: Update benchmark data" || echo "No changes to commit"
git push origin ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
8 changes: 7 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SCDB
Type: Package
Title: Easily Access and Maintain Time-Based Versioned Data (Slowly-Changing-Dimension)
Version: 0.4.1
Version: 0.4.1.9000
Authors@R:
c(person("Rasmus Skytte", "Randl\U00F8v", , "[email protected]",
role = c("aut", "cre", "rev"),
Expand All @@ -21,6 +21,8 @@ License: GPL-3
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE, r6 = TRUE)
Depends:
R (>= 3.5.0)
Imports:
checkmate,
DBI,
Expand All @@ -42,10 +44,14 @@ Suggests:
callr,
conflicted,
duckdb,
ggplot2,
here,
jsonlite,
knitr,
lintr,
microbenchmark,
odbc,
pak,
rmarkdown,
roxygen2,
pkgdown,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(as.character,Id)
S3method(create_index,DBIConnection)
S3method(create_index,PqConnection)
S3method(create_index,SQLiteConnection)
S3method(db_timestamp,"NULL")
S3method(db_timestamp,SQLiteConnection)
S3method(db_timestamp,default)
Expand Down Expand Up @@ -54,6 +57,7 @@ S3method(tidyr::unite,tbl_dbi)
export(Logger)
export(LoggerNull)
export(close_connection)
export(create_index)
export(create_logs_if_missing)
export(create_table)
export(db_timestamp)
Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# SCDB (development version)

## New features

* Added function `create_index` to allow easy creating of an index on a table (#137).

## Improvements and Fixes

* `update_snapshot()` has been optimized and now runs faster on all the supported backends (#137).

## Documentation

* A vignette including benchmarks of `update_snapshot()` across various backends is added (#138).


# SCDB 0.4.1

## Improvements and Fixes
Expand All @@ -14,6 +29,7 @@

* Improved tests for `get_tables()` (#145).


# SCDB 0.4.0

## BREAKING CHANGES:
Expand Down
8 changes: 8 additions & 0 deletions R/connection.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ get_connection.OdbcDriver <- function(
checkmate::assert_choice(timezone_out, OlsonNames(), null.ok = TRUE, add = coll)
checkmate::reportAssertions(coll)

# Recommend batch processing for ODBC connections
if (is.null(getOption("odbc.batch_rows"))) {
message(
"Transfer of large data sets may be slow. ",
"Consider using options(\"odbc.batch_rows\" = 1000) to speed up transfer."
)
}

# Check if connection can be established given these settings
status <- do.call(DBI::dbCanConnect, args = args)
if (!status) stop(attr(status, "reason"))
Expand Down
Loading

0 comments on commit 75a17b8

Please sign in to comment.