Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/sort-merge-acco…
Browse files Browse the repository at this point in the history
…unting
  • Loading branch information
alamb committed Jul 28, 2023
2 parents 4e23ba6 + 55930fb commit 579cbd2
Show file tree
Hide file tree
Showing 574 changed files with 61,640 additions and 32,637 deletions.
6 changes: 4 additions & 2 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
# https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features

notifications:
commits: [email protected]
issues: [email protected]
commits: [email protected]
issues: [email protected]
pullrequests: [email protected]
jira_options: link label worklog
github:
Expand All @@ -44,6 +44,8 @@ github:
rebase: false
features:
issues: true
protected_branches:
main: { }

# publishes the content of the `asf-site` branch to
# https://arrow.apache.org/datafusion/
Expand Down
5 changes: 5 additions & 0 deletions .github/actions/setup-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ runs:
# "1" means line tables only, which is useful for panic tracebacks.
shell: bash
run: echo "RUSTFLAGS=-C debuginfo=1" >> $GITHUB_ENV
- name: Disable incremental compilation
# Disable incremental compilation to save diskspace (the CI doesn't recompile modified files)
# https://github.com/apache/arrow-datafusion/issues/6676
shell: bash
run: echo "CARGO_INCREMENTAL=0" >> $GITHUB_ENV
- name: Enable backtraces
shell: bash
run: echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
github.event_name == 'pull_request_target' &&
(github.event.action == 'opened' ||
github.event.action == 'synchronize')
uses: actions/labeler@4.1.0
uses: actions/labeler@v4.3.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/workflows/dev_pr/labeler.yml
Expand Down
51 changes: 43 additions & 8 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,33 @@ jobs:
- name: Verify Working Directory Clean
run: git diff --exit-code

linux-test-datafusion-cli:
name: cargo test datafusion-cli (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests (excluding doctests)
run: |
cd datafusion-cli
cargo test --lib --tests --bins --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code

linux-test-example:
name: cargo examples (amd64)
needs: [ linux-build-lib ]
Expand Down Expand Up @@ -152,7 +179,10 @@ jobs:
rust-version: stable
# Note: this does not include dictionary_expressions to reduce codegen
- name: Run doctests
run: cargo test --doc --features avro,json
run: |
cargo test --doc --features avro,json
cd datafusion-cli
cargo test --doc --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code

Expand All @@ -173,6 +203,8 @@ jobs:
run: |
export RUSTDOCFLAGS="-D warnings -A rustdoc::private-intra-doc-links"
cargo doc --document-private-items --no-deps --workspace
cd datafusion-cli
cargo doc --document-private-items --no-deps
# verify that the benchmark queries return the correct results
verify-benchmark-results:
Expand All @@ -197,18 +229,17 @@ jobs:
rust-version: stable
- name: Generate benchmark data and expected query results
run: |
mkdir -p benchmarks/data/answers
mkdir -p datafusion/core/tests/sqllogictests/test_files/tpch/data
git clone https://github.com/databricks/tpch-dbgen.git
cd tpch-dbgen
make
./dbgen -f -s 1
mv *.tbl ../benchmarks/data
mv ./answers/* ../benchmarks/data/answers/
./dbgen -f -s 0.1
mv *.tbl ../datafusion/core/tests/sqllogictests/test_files/tpch/data
- name: Verify that benchmark queries return expected results
run: |
export TPCH_DATA=`pwd`/benchmarks/data
cargo test verify_q --profile release-nonlto --features=ci -- --test-threads=1
export TPCH_DATA=`realpath datafusion/core/tests/sqllogictests/test_files/tpch/data`
cargo test serde_q --profile release-nonlto --features=ci -- --test-threads=1
INCLUDE_TPCH=true cargo test -p datafusion --test sqllogictests
- name: Verify Working Directory Clean
run: git diff --exit-code

Expand Down Expand Up @@ -272,6 +303,8 @@ jobs:
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --lib --tests --bins --features avro,json,dictionary_expressions
cd datafusion-cli
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
Expand Down Expand Up @@ -305,6 +338,8 @@ jobs:
shell: bash
run: |
cargo test --lib --tests --bins --features avro,json,dictionary_expressions
cd datafusion-cli
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
Expand Down Expand Up @@ -461,7 +496,7 @@ jobs:
- name: Run tests
run: |
cd datafusion
cargo test --lib --tests --features=force_hash_collisions
cargo test --lib --tests --features=force_hash_collisions,avro
cargo-toml-formatting-checks:
name: check Cargo.toml formatting
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,6 @@ arrow-ballista

datafusion/CHANGELOG.md.bak
.githubchangeloggenerator.cache*

# Generated tpch data
datafusion/core/tests/sqllogictests/test_files/tpch/data/*
42 changes: 15 additions & 27 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,28 @@

[workspace]
exclude = ["datafusion-cli"]
members = [
"datafusion/common",
"datafusion/core",
"datafusion/expr",
"datafusion/execution",
"datafusion/optimizer",
"datafusion/physical-expr",
"datafusion/proto",
"datafusion/proto/gen",
"datafusion/row",
"datafusion/sql",
"datafusion/substrait",
"datafusion-examples",
"test-utils",
"benchmarks",
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/substrait", "datafusion-examples", "test-utils", "benchmarks",
]
resolver = "2"

[workspace.package]
version = "24.0.0"
edition = "2021"
readme = "README.md"
authors = ["Apache Arrow <[email protected]>"]
license = "Apache-2.0"
edition = "2021"
homepage = "https://github.com/apache/arrow-datafusion"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.64"
rust-version = "1.70"
version = "28.0.0"

[workspace.dependencies]
arrow = { version = "39.0.0", features = ["prettyprint"] }
arrow-flight = { version = "39.0.0", features = ["flight-sql-experimental"] }
arrow-buffer = { version = "39.0.0", default-features = false }
arrow-schema = { version = "39.0.0", default-features = false }
arrow-array = { version = "39.0.0", default-features = false, features = ["chrono-tz"] }
parquet = { version = "39.0.0", features = ["arrow", "async", "object_store"] }
arrow = { version = "43.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "43.0.0", default-features = false }
arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] }
arrow-schema = { version = "43.0.0", default-features = false }
parquet = { version = "43.0.0", features = ["arrow", "async", "object_store"] }
sqlparser = { version = "0.36.1", features = ["visitor"] }

[profile.release]
codegen-units = 1
Expand All @@ -68,4 +56,4 @@ lto = false
opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false
rpath = false
26 changes: 16 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,28 @@

# DataFusion

[![Coverage Status](https://codecov.io/gh/apache/arrow-datafusion/rust/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow-datafusion?branch=master)

<img src="docs/source/_static/images/DataFusion-Logo-Background-White.svg" width="256" alt="logo"/>
<img src="https://arrow.apache.org/datafusion/_images/DataFusion-Logo-Background-White.png" width="256" alt="logo"/>

DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in
[Rust](http://rustlang.org), using the [Apache Arrow](https://arrow.apache.org)
in-memory format.
in-memory format. [Python Bindings](https://github.com/apache/arrow-datafusion-python) are also available. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community.

Here are links to some important information

DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community.
- [Project Site](https://arrow.apache.org/datafusion)
- [Rust Getting Started](https://arrow.apache.org/datafusion/user-guide/example-usage.html)
- [Rust DataFrame API](https://arrow.apache.org/datafusion/user-guide/dataframe.html)
- [Rust API docs](https://docs.rs/datafusion/latest/datafusion)
- [Rust Examples](https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples)
- [Python DataFrame API](https://arrow.apache.org/datafusion-python/)
- [Architecture](https://docs.rs/datafusion/latest/datafusion/index.html#architecture)

See the Project Website at https://arrow.apache.org/datafusion/ for more details.
## Building your project with DataFusion

## Getting Started
DataFusion is great for building projects and products like SQL interfaces, time series platforms, and domain specific query engines. [Click Here](https://arrow.apache.org/datafusion/user-guide/introduction.html#known-users) to see a list known users.

Please see the [developer’s guide](https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide) for more information on how to get started.
## Contributing to DataFusion

## Examples
The [developer’s guide] contains information on how to contribute.

Please see the [example usage](https://arrow.apache.org/datafusion/user-guide/example-usage.html) in the user guide and the [datafusion-examples](https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples) crate for more information on how to use DataFusion.
[developer’s guide]: https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide
9 changes: 5 additions & 4 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
[package]
name = "datafusion-benchmarks"
description = "DataFusion Benchmarks"
version = "24.0.0"
version = "28.0.0"
edition = "2021"
authors = ["Apache Arrow <[email protected]>"]
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
license = "Apache-2.0"
rust-version = "1.62"
rust-version = "1.70"

[features]
ci = []
Expand All @@ -34,9 +34,10 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = { workspace = true }
datafusion = { path = "../datafusion/core", version = "24.0.0" }
datafusion = { path = "../datafusion/core", version = "28.0.0" }
env_logger = "0.10"
futures = "0.3"
log = "^0.4"
mimalloc = { version = "0.1", optional = true, default-features = false }
num_cpus = "1.13.0"
parquet = { workspace = true }
Expand All @@ -48,4 +49,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { path = "../datafusion/proto", version = "24.0.0" }
datafusion-proto = { path = "../datafusion/proto", version = "28.0.0" }
Loading

0 comments on commit 579cbd2

Please sign in to comment.