Skip to content

Upgrade DataFusion from 31.0.0 to 40.1.0 #163

Upgrade DataFusion from 31.0.0 to 40.1.0

Upgrade DataFusion from 31.0.0 to 40.1.0 #163

Workflow file for this run

name: Regression
on:
workflow_dispatch:
repository_dispatch:
pull_request:
paths-ignore:
- '**.md'
- '.github/workflows/**'
- '!.github/workflows/Regression.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/master' || github.sha }}
cancel-in-progress: true
jobs:
regression-test-benchmark-runner-solo-solutions:
strategy:
fail-fast: false
matrix:
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, R-arrow, duckdb, datafusion, dask, clickhouse]
name: Regression Tests solo solutions
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install libraries
shell: bash
run: ./_utils/setup-small.sh
- name: Generate 500mb datasets
shell: bash
run: ./_utils/generate-data-small.sh
- name: Remove old logs
shell: bash
run: rm time.csv logs.csv
- name: Install all solutions
shell: bash
run: source path.env && python3 _utils/install_all_solutions.py ${{ matrix.solution }}
- name: Turn swap off
shell: bash
run: sudo swapoff -a
# needed because clickhouse for some reason produces an error the first
# time a benchmark is run. The next benchmark run will work and overwrite the
# old benchmark files.
- name: Run mini GroupBy benchmark if clickhouse
shell: bash
if: ${{ matrix.solution == 'clickhouse' || matrix.solution == 'all' }}
run: |
python3 _utils/prep_solutions.py --task=groupby --solution=clickhouse
source path.env
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh
- name: Run mini GroupBy benchmark
shell: bash
run: |
python3 _utils/prep_solutions.py --task=groupby --solution=${{ matrix.solution }}
source path.env
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh
- name: Run mini Join benchmark
shell: bash
run: |
python3 _utils/prep_solutions.py --task=join --solution=${{ matrix.solution }}
source path.env
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh
- name: Validate benchmark results and report generation
shell: bash
run: ./_utils/validate_no_errors.sh
- name: Create Archive
if: always()
shell: bash
run: |
cp *.csv out/
zip -r ${{ matrix.solution }}-out.zip out/
# include this step to see what the latest versions are of every solution
- name: Print latest versions
if: always()
shell: bash
run: tail -n +1 */VERSION
- uses: actions/upload-artifact@v3
if: always()
with:
name: ${{ matrix.solution }}-out.zip
path: ${{ matrix.solution }}-out.zip
if-no-files-found: error
regression-test-benchmark-runner-all-solutions:
needs: regression-test-benchmark-runner-solo-solutions
name: Regression Tests all solutions
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install libraries
shell: bash
run: ./_utils/setup-small.sh
- name: Generate 500mb datasets
shell: bash
run: ./_utils/generate-data-small.sh
- name: Remove old logs
shell: bash
run: rm time.csv logs.csv
- name: Install all solutions
shell: bash
run: source path.env && python3 _utils/install_all_solutions.py all
- name: Turn swap off
shell: bash
run: sudo swapoff -a
- name: Run mini GroupBy benchmark
shell: bash
run: |
python3 _utils/prep_solutions.py --task=groupby --solution=all
source path.env
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh
- name: Run mini Join benchmark
shell: bash
run: |
python3 _utils/prep_solutions.py --task=join --solution=all
source path.env
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh
- name: Validate benchmark results and report generation
shell: bash
run: ./_utils/validate_no_errors.sh
- name: Create Archive
if: always()
shell: bash
run: |
cp *.csv out/
zip -r all-out.zip out/
# include this step to see what the latest versions are of every solution
- name: Print latest versions
if: always()
shell: bash
run: tail -n +1 */VERSION
- uses: actions/upload-artifact@v3
if: always()
with:
name: all-out.zip
path: all-out.zip
if-no-files-found: error