Skip to content

Crude hack to introduce type coercion for hash join keys #1180

Crude hack to introduce type coercion for hash join keys

Crude hack to introduce type coercion for hash join keys #1180

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Rust
on:
# always trigger
push:
pull_request:
jobs:
# build the library, a compilation step used by multiple steps below
linux-build-lib:
name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2022-09-22]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v2
- name: Cache Cargo
uses: actions/cache@v2
with:
# these represent dependencies downloaded by cargo
# and thus do not depend on the OS, arch nor rust version.
path: /github/home/.cargo
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
with:
# these represent compiled steps of both dependencies and arrow
# and thus are specific for a particular OS, arch and rust version.
path: /github/home/target
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Build workspace in debug mode
run: |
cargo build
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target/debug"
- name: Build workspace in release mode
run: |
cargo check --release
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target/release"
- name: Check DataFusion builds without default features
run: |
cargo check --no-default-features -p datafusion
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
- name: Check DataFusion CLI builds
run: |
(cd datafusion-cli && cargo check)
(cd datafusion-cli && cargo check --no-default-features)
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# test the crate
linux-test:
name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2022-09-22]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Run tests
run: |
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# test datafusion examples
cd datafusion-examples
cargo test --no-default-features
cargo run --example csv_sql
cargo run --example parquet_sql
cargo run --example avro_sql --features=datafusion/avro
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
integration-test:
name: "Integration Test"
needs: [linux-build-lib]
runs-on: ubuntu-latest
services:
postgres:
image: postgres:14
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: db_test
ports:
- 5432/tcp
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Setup toolchain
run: |
rustup toolchain install nightly-2022-09-22
rustup default nightly-2022-09-22
- uses: actions/setup-python@v2
with:
python-version: "3.10"
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -r integration-tests/requirements.txt
- name: Allow access of psql
run: |
# make sure psql can access the server
echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
chmod 0600 ~/.pgpass
psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'CREATE TABLE IF NOT EXISTS test (
c1 character varying NOT NULL,
c2 integer NOT NULL,
c3 smallint NOT NULL,
c4 smallint NOT NULL,
c5 integer NOT NULL,
c6 bigint NOT NULL,
c7 smallint NOT NULL,
c8 integer NOT NULL,
c9 bigint NOT NULL,
c10 character varying NOT NULL,
c11 double precision NOT NULL,
c12 double precision NOT NULL,
c13 character varying NOT NULL
);'
psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
POSTGRES_DB: db_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
- name: Build datafusion-cli
run: (cd datafusion-cli && cargo build)
- name: Test Psql Parity
run: python -m pytest -v integration-tests/test_psql_parity.py
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
POSTGRES_DB: db_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
windows-and-macos:
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-latest]
rust: [nightly-2022-09-22]
steps:
- uses: actions/checkout@v2
with:
submodules: true
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
rustup component add rustfmt
- name: Run tests
shell: bash
run: |
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
# FIXME: pyarrow test fails, possibly due to dated rust-toolchain.
# test-datafusion-pyarrow:
# needs: [linux-build-lib]
# runs-on: ubuntu-20.04
# strategy:
# matrix:
# arch: [amd64]
# rust: [nightly-2022-03-22]
# container:
# image: ${{ matrix.arch }}/rust
# env:
# # Disable full debug symbol generation to speed up CI build and keep memory down
# # "1" means line tables only, which is useful for panic tracebacks.
# RUSTFLAGS: "-C debuginfo=1"
# steps:
# - uses: actions/checkout@v2
# with:
# submodules: true
# - name: Cache Cargo
# uses: actions/cache@v2
# with:
# path: /github/home/.cargo
# # this key equals the ones on `linux-build-lib` for re-use
# key: cargo-cache-
# - name: Cache Rust dependencies
# uses: actions/cache@v2
# with:
# path: /github/home/target
# # this key equals the ones on `linux-build-lib` for re-use
# key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
# - uses: actions/setup-python@v2
# with:
# python-version: "3.8"
# - name: Install PyArrow
# run: |
# echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
# python -m pip install pyarrow
# - name: Setup Rust toolchain
# run: |
# rustup toolchain install ${{ matrix.rust }}
# rustup default ${{ matrix.rust }}
# rustup component add rustfmt
# - name: Run tests
# run: |
# cd datafusion
# cargo test --features=pyarrow
# env:
# CARGO_HOME: "/github/home/.cargo"
# CARGO_TARGET_DIR: "/github/home/target"
lint:
name: Lint
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v2
- name: Setup toolchain
run: |
rustup toolchain install nightly-2022-09-22
rustup default nightly-2022-09-22
rustup component add rustfmt
- name: Run
run: cargo fmt --all -- --check
clippy:
name: Clippy
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2022-09-22]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Install Clippy
run: |
rustup component add clippy
- name: Run clippy
run: |
cargo clippy --all-targets --workspace -- -D warnings
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# Check answers are correct when hash values collide
hash-collisions:
name: Test Hash Collisions on AMD64 Rust ${{ matrix.rust }}
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2022-09-22]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Maximize build space (disk space limitations)
run: |
echo "Disk Space before cleanup"
df -h
du -h --max-depth 1 / 2>/dev/null || true
du -h --max-depth 1 /__t 2>/dev/null || true
du -h --max-depth 1 /__e 2>/dev/null || true
du -h --max-depth 1 /__w 2>/dev/null || true
rm -rf /__t/CodeQL
echo "Disk Space after cleanup"
df -h
- name: Run tests
run: |
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cd datafusion
# Force all hash values to collide
cargo test --all --features=force_hash_collisions
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# FIXME: Cargo.toml checks are disabled for now as they are failing.
# cargo-toml-formatting-checks:
# name: Check Cargo.toml formatting
# needs: [linux-build-lib]
# runs-on: ubuntu-latest
# strategy:
# matrix:
# arch: [amd64]
# rust: [nightly-2022-03-22]
# container:
# image: ${{ matrix.arch }}/rust
# env:
# # Disable full debug symbol generation to speed up CI build and keep memory down
# # "1" means line tables only, which is useful for panic tracebacks.
# RUSTFLAGS: "-C debuginfo=1"
# steps:
# - uses: actions/checkout@v2
# with:
# submodules: true
# - name: Cache Cargo
# uses: actions/cache@v2
# with:
# path: /github/home/.cargo
# # this key equals the ones on `linux-build-lib` for re-use
# key: cargo-cache-
# - name: Cache Rust dependencies
# uses: actions/cache@v2
# with:
# path: /github/home/target
# # this key equals the ones on `linux-build-lib` for re-use
# key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
# - name: Setup Rust toolchain
# run: |
# rustup toolchain install ${{ matrix.rust }}
# rustup default ${{ matrix.rust }}
# - name: Install cargo-tomlfmt
# run: |
# which cargo-tomlfmt || cargo install cargo-tomlfmt
# env:
# CARGO_HOME: "/github/home/.cargo"
# CARGO_TARGET_DIR: "/github/home/target"
# - name: Check Cargo.toml formatting
# run: |
# # if you encounter error, try rerun the command below, finally run 'git diff' to
# # check which Cargo.toml introduces formatting violation
# #
# # ignore ./Cargo.toml because putting workspaces in multi-line lists make it easy to read
# find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -p {} \;
# git diff --exit-code
# env:
# CARGO_HOME: "/github/home/.cargo"
# CARGO_TARGET_DIR: "/github/home/target"
# Coverage job was failing. https://github.com/apache/arrow-datafusion/issues/590 tracks re-instating it
# coverage:
# name: Coverage
# runs-on: ubuntu-latest
# strategy:
# matrix:
# arch: [amd64]
# rust: [nightly-2022-03-22]
# steps:
# - uses: actions/checkout@v2
# with:
# submodules: true
# - name: Cache Cargo
# uses: actions/cache@v2
# with:
# path: /home/runner/.cargo
# # this key is not equal because the user is different than on a container (runner vs github)
# key: cargo-coverage-cache-
# - name: Cache Rust dependencies
# uses: actions/cache@v2
# with:
# path: /home/runner/target
# # this key is not equal because coverage uses different compilation flags.
# key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
# - name: Run coverage
# run: |
# export ARROW_TEST_DATA=$(pwd)/testing/data
# export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
# # see https://github.com/xd009642/tarpaulin/issues/618
# cargo install --version 0.16.0 cargo-tarpaulin
# cargo tarpaulin --out Xml
# env:
# CARGO_HOME: "/home/runner/.cargo"
# CARGO_TARGET_DIR: "/home/runner/target"
# - name: Report coverage
# continue-on-error: true
# run: bash <(curl -s https://codecov.io/bash)