From cf492055f72635cce83ee24e88cc78f7a30e296d Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 13 Jan 2025 11:56:52 -0800 Subject: [PATCH] feat: upgrade datafusion to 44.0 (#3341) I wanted to upgrade to 44 to check and see if the string sorting bug was fixed. It wasn't. However, it might still be useful to commit this since we'll need to upgrade at some point. --- Cargo.lock | 586 ++++++++----------- Cargo.toml | 18 +- python/Cargo.lock | 426 ++++++-------- rust/lance-datafusion/Cargo.toml | 4 +- rust/lance-datafusion/src/exec.rs | 36 +- rust/lance-datafusion/src/planner.rs | 7 +- rust/lance-datafusion/src/sql.rs | 3 +- rust/lance-datafusion/src/substrait.rs | 14 +- rust/lance-index/src/scalar/btree.rs | 7 +- rust/lance/src/datafusion/dataframe.rs | 9 + rust/lance/src/dataset/scanner.rs | 69 ++- rust/lance/src/dataset/write/merge_insert.rs | 67 ++- rust/lance/src/io/exec/fts.rs | 11 +- rust/lance/src/io/exec/knn.rs | 13 +- rust/lance/src/io/exec/optimizer.rs | 2 + rust/lance/src/io/exec/pushdown_scan.rs | 6 +- rust/lance/src/io/exec/scalar_index.rs | 14 +- rust/lance/src/io/exec/scan.rs | 4 +- rust/lance/src/io/exec/testing.rs | 7 +- 19 files changed, 629 insertions(+), 674 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c428391884..030211518e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -429,24 +429,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-compression" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - [[package]] name = "async-executor" version = "1.13.1" @@ -522,7 +504,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -565,7 +547,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -964,6 +946,19 @@ dependencies = [ "vsimd", ] +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bincode" version = "1.3.3" @@ -1021,28 +1016,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -1141,27 +1114,6 @@ dependencies = [ "either", ] -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "cast" version = "0.3.0" @@ -1304,7 +1256,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -1378,12 +1330,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "constant_time_eq" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" - [[package]] name = "convert_case" version = "0.6.0" @@ -1609,7 +1555,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -1620,7 +1566,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -1658,19 +1604,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" +checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", - "bzip2", "chrono", "dashmap 6.1.0", "datafusion-catalog", @@ -1681,6 +1624,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -1688,36 +1632,27 @@ dependencies = [ "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-sql", - "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools 0.13.0", "log", - "num_cpus", "object_store 0.11.1", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", + "regex", "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", - "xz2", - "zstd", ] [[package]] name = "datafusion-catalog" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" +checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" dependencies = [ "arrow-schema", "async-trait", @@ -1730,51 +1665,55 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" +checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", - "chrono", "half", "hashbrown 0.14.5", - "instant", + "indexmap", "libc", - "num_cpus", + "log", "object_store 0.11.1", "parquet", "paste", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" +checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" + [[package]] name = "datafusion-execution" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" +checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" dependencies = [ "arrow", - "chrono", "dashmap 6.1.0", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store 0.11.1", "parking_lot", @@ -1785,104 +1724,101 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" +checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" dependencies = [ "arrow", "datafusion-common", - "paste", + "itertools 0.13.0", ] [[package]] name = "datafusion-functions" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" +checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" dependencies = [ "arrow", "arrow-buffer", "base64 0.22.1", - "blake2", - "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools 0.13.0", "log", - "md-5", "rand", "regex", - "sha2", "unicode-segmentation", "uuid", ] [[package]] name = "datafusion-functions-aggregate" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" +checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" +checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" dependencies = [ "arrow", "arrow-array", @@ -1898,106 +1834,139 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" +dependencies = [ + "quote", + "syn 2.0.96", ] [[package]] name = "datafusion-optimizer" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" +checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools 0.13.0", "log", - "paste", + "regex", "regex-syntax 0.8.5", ] [[package]] name = "datafusion-physical-expr" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" +checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "base64 0.22.1", - "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", "indexmap", "itertools 0.13.0", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" +checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" +checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" dependencies = [ - "arrow-schema", + "arrow", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", + "log", ] [[package]] name = "datafusion-physical-plan" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" +checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" dependencies = [ "ahash", "arrow", @@ -2011,8 +1980,7 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", @@ -2021,45 +1989,45 @@ dependencies = [ "indexmap", "itertools 0.13.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-sql" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" +checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" dependencies = [ "arrow", "arrow-array", "arrow-schema", + "bigdecimal", "datafusion-common", "datafusion-expr", + "indexmap", "log", "regex", "sqlparser", - "strum", ] [[package]] name = "datafusion-substrait" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" +checksum = "d2ec36dd38512b1ecc7a3bb92e72046b944611b2f0d709445c1e51b0143bffd4" dependencies = [ "arrow-buffer", "async-recursion", + "async-trait", "chrono", "datafusion", "itertools 0.13.0", "object_store 0.11.1", "pbjson-types", - "prost 0.13.3", - "substrait 0.41.9", + "prost 0.13.4", + "substrait 0.50.4", "url", ] @@ -2120,7 +2088,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -2130,7 +2098,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -2179,7 +2147,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -2345,7 +2313,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -2603,7 +2571,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -3158,7 +3126,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -3208,7 +3176,7 @@ dependencies = [ "libflate", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -3470,8 +3438,8 @@ dependencies = [ "pprof", "pretty_assertions", "prost 0.12.6", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", "rand", "random_word", "roaring", @@ -3533,7 +3501,7 @@ dependencies = [ "object_store 0.10.2", "pin-project", "proptest", - "prost 0.13.3", + "prost 0.13.4", "rand", "roaring", "serde_json", @@ -3568,7 +3536,7 @@ dependencies = [ "lance-datagen", "lazy_static", "log", - "prost 0.13.3", + "prost 0.13.4", "snafu 0.7.5", "substrait-expr", "tokio", @@ -3622,9 +3590,9 @@ dependencies = [ "num-traits", "paste", "pprof", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "protobuf-src", "rand", "rand_xoshiro", @@ -3661,9 +3629,9 @@ dependencies = [ "lance-io", "log", "pprof", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "protobuf-src", "rand", "snafu 0.7.5", @@ -3701,9 +3669,9 @@ dependencies = [ "pprof", "pretty_assertions", "proptest", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "protobuf-src", "rand", "roaring", @@ -3760,8 +3728,8 @@ dependencies = [ "num-traits", "object_store 0.10.2", "pprof", - "prost 0.13.3", - "prost-build 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", "protobuf-src", "rand", "random_word", @@ -3811,7 +3779,7 @@ dependencies = [ "path_abs", "pin-project", "pprof", - "prost 0.13.3", + "prost 0.13.4", "rand", "shellexpand", "snafu 0.7.5", @@ -3902,9 +3870,9 @@ dependencies = [ "pprof", "pretty_assertions", "proptest", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "protobuf-src", "rand", "rangemap", @@ -3924,7 +3892,7 @@ version = "0.22.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -4316,7 +4284,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -4614,7 +4582,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -4781,8 +4749,8 @@ checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ "heck 0.5.0", "itertools 0.13.0", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", ] [[package]] @@ -4795,8 +4763,8 @@ dependencies = [ "chrono", "pbjson", "pbjson-build", - "prost 0.13.3", - "prost-build 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", "serde", ] @@ -4877,7 +4845,7 @@ checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -5033,7 +5001,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -5086,12 +5054,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", - "prost-derive 0.13.3", + "prost-derive 0.13.4", ] [[package]] @@ -5111,17 +5079,16 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.90", + "syn 2.0.96", "tempfile", ] [[package]] name = "prost-build" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" dependencies = [ - "bytes", "heck 0.5.0", "itertools 0.13.0", "log", @@ -5129,10 +5096,10 @@ dependencies = [ "once_cell", "petgraph", "prettyplease", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", "regex", - "syn 2.0.90", + "syn 2.0.96", "tempfile", ] @@ -5146,20 +5113,20 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -5173,11 +5140,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", ] [[package]] @@ -5473,16 +5440,6 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" -[[package]] -name = "regress" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" -dependencies = [ - "hashbrown 0.14.5", - "memchr", -] - [[package]] name = "regress" version = "0.10.1" @@ -5615,7 +5572,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.90", + "syn 2.0.96", "unicode-ident", ] @@ -5831,7 +5788,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -5888,9 +5845,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" dependencies = [ "serde", ] @@ -5903,22 +5860,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -5929,14 +5886,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "itoa", "memchr", @@ -5953,7 +5910,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6095,7 +6052,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6122,9 +6079,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -6132,13 +6089,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6196,64 +6153,65 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] name = "substrait" -version = "0.41.9" +version = "0.49.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" +checksum = "2c271a596176d3b82bfc5b4107fe9fbd30e6a9a99c0dca146777f05d8f0e08e4" dependencies = [ "heck 0.5.0", - "pbjson", - "pbjson-build", - "pbjson-types", "prettyplease", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", + "regress", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.90", - "typify 0.1.0", + "syn 2.0.96", + "typify", "walkdir", ] [[package]] name = "substrait" -version = "0.49.5" +version = "0.50.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c271a596176d3b82bfc5b4107fe9fbd30e6a9a99c0dca146777f05d8f0e08e4" +checksum = "b1772d041c37cc7e6477733c76b2acf4ee36bd52b2ae4d9ea0ec9c87d003db32" dependencies = [ "heck 0.5.0", + "pbjson", + "pbjson-build", + "pbjson-types", "prettyplease", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", - "regress 0.10.1", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", + "regress", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.90", - "typify 0.2.0", + "syn 2.0.96", + "typify", "walkdir", ] [[package]] name = "substrait-expr" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a6a94f5dd69c5329a9c96c93ac5f17a8d64089ca21d29d7971825f7451941d" +checksum = "9d091cf06bc7808bd81eb01f5f5b77b2b14288bb022501a2dcad78633c65262f" dependencies = [ "once_cell", - "prost 0.13.3", - "substrait 0.49.5", + "prost 0.13.4", + "substrait 0.50.4", "substrait-expr-funcgen", "substrait-expr-macros", "thiserror 2.0.4", @@ -6271,7 +6229,7 @@ dependencies = [ "quote", "serde_yaml", "substrait 0.49.5", - "syn 2.0.90", + "syn 2.0.96", "thiserror 2.0.4", ] @@ -6283,7 +6241,7 @@ checksum = "3a2be2af0276c9d693f90d0f4e0e7b1790b14692538e0d418812249f41c055be" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6328,9 +6286,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.90" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -6354,7 +6312,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6589,7 +6547,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6647,7 +6605,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6658,7 +6616,7 @@ checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6782,7 +6740,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6882,7 +6840,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -6963,44 +6921,14 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "typify" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" -dependencies = [ - "typify-impl 0.1.0", - "typify-macro 0.1.0", -] - [[package]] name = "typify" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" dependencies = [ - "typify-impl 0.2.0", - "typify-macro 0.2.0", -] - -[[package]] -name = "typify-impl" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" -dependencies = [ - "heck 0.5.0", - "log", - "proc-macro2", - "quote", - "regress 0.9.1", - "schemars", - "semver", - "serde", - "serde_json", - "syn 2.0.90", - "thiserror 1.0.69", - "unicode-ident", + "typify-impl", + "typify-macro", ] [[package]] @@ -7013,33 +6941,16 @@ dependencies = [ "log", "proc-macro2", "quote", - "regress 0.10.1", + "regress", "schemars", "semver", "serde", "serde_json", - "syn 2.0.90", + "syn 2.0.96", "thiserror 1.0.69", "unicode-ident", ] -[[package]] -name = "typify-macro" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" -dependencies = [ - "proc-macro2", - "quote", - "schemars", - "semver", - "serde", - "serde_json", - "serde_tokenstream", - "syn 2.0.90", - "typify-impl 0.1.0", -] - [[package]] name = "typify-macro" version = "0.2.0" @@ -7053,8 +6964,8 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.90", - "typify-impl 0.2.0", + "syn 2.0.96", + "typify-impl", ] [[package]] @@ -7267,7 +7178,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", "wasm-bindgen-shared", ] @@ -7302,7 +7213,7 @@ checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -7686,15 +7597,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yada" version = "0.5.1" @@ -7727,7 +7629,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", "synstructure", ] @@ -7749,7 +7651,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] @@ -7769,7 +7671,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", "synstructure", ] @@ -7798,7 +7700,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.96", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ccd905ae0e..e70b655c1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,20 +95,18 @@ criterion = { version = "0.5", features = [ "html_reports", ] } crossbeam-queue = "0.3" -datafusion = { version = "42.0", default-features = false, features = [ +datafusion = { version = "44.0", default-features = false, features = [ "nested_expressions", "regex_expressions", "unicode_expressions", ] } -datafusion-common = "42.0" -datafusion-functions = { version = "42.0", features = ["regex_expressions"] } -datafusion-sql = "42.0" -datafusion-expr = "42.0" -datafusion-execution = "42.0" -datafusion-optimizer = "42.0" -datafusion-physical-expr = { version = "42.0", features = [ - "regex_expressions", -] } +datafusion-common = "44.0" +datafusion-functions = { version = "44.0", features = ["regex_expressions"] } +datafusion-sql = "44.0" +datafusion-expr = "44.0" +datafusion-execution = "44.0" +datafusion-optimizer = "44.0" +datafusion-physical-expr = { version = "44.0" } deepsize = "0.2.0" dirs = "5.0.0" either = "1.0" diff --git a/python/Cargo.lock b/python/Cargo.lock index 238c9f3e39..b8bd018c6e 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -100,12 +100,6 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" -[[package]] -name = "arrayvec" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" - [[package]] name = "arrow" version = "53.3.0" @@ -351,24 +345,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-compression" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - [[package]] name = "async-executor" version = "1.13.1" @@ -886,6 +862,19 @@ dependencies = [ "vsimd", ] +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bincode" version = "1.3.3" @@ -928,28 +917,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -1027,27 +994,6 @@ dependencies = [ "either", ] -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "cc" version = "1.2.2" @@ -1162,12 +1108,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "constant_time_eq" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" - [[package]] name = "core-foundation" version = "0.9.4" @@ -1386,19 +1326,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" +checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", - "bzip2", "chrono", "dashmap 6.1.0", "datafusion-catalog", @@ -1409,6 +1346,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -1416,36 +1354,27 @@ dependencies = [ "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-sql", - "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools 0.13.0", "log", - "num_cpus", "object_store 0.11.1", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", + "regex", "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", - "xz2", - "zstd", ] [[package]] name = "datafusion-catalog" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" +checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" dependencies = [ "arrow-schema", "async-trait", @@ -1458,51 +1387,55 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" +checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", - "chrono", "half", "hashbrown 0.14.5", - "instant", + "indexmap", "libc", - "num_cpus", + "log", "object_store 0.11.1", "parquet", "paste", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" +checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" + [[package]] name = "datafusion-execution" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" +checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" dependencies = [ "arrow", - "chrono", "dashmap 6.1.0", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store 0.11.1", "parking_lot", @@ -1513,104 +1446,101 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" +checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" dependencies = [ "arrow", "datafusion-common", - "paste", + "itertools 0.13.0", ] [[package]] name = "datafusion-functions" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" +checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" dependencies = [ "arrow", "arrow-buffer", "base64 0.22.1", - "blake2", - "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools 0.13.0", "log", - "md-5", "rand", "regex", - "sha2", "unicode-segmentation", "uuid", ] [[package]] name = "datafusion-functions-aggregate" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" +checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" +checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" dependencies = [ "arrow", "arrow-array", @@ -1626,106 +1556,139 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" +dependencies = [ + "quote", + "syn 2.0.90", ] [[package]] name = "datafusion-optimizer" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" +checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools 0.13.0", "log", - "paste", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" +checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "base64 0.22.1", - "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", "indexmap", "itertools 0.13.0", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" +checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" +checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" dependencies = [ - "arrow-schema", + "arrow", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", + "log", ] [[package]] name = "datafusion-physical-plan" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" +checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" dependencies = [ "ahash", "arrow", @@ -1739,8 +1702,7 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", @@ -1749,44 +1711,44 @@ dependencies = [ "indexmap", "itertools 0.13.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-sql" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" +checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" dependencies = [ "arrow", "arrow-array", "arrow-schema", + "bigdecimal", "datafusion-common", "datafusion-expr", + "indexmap", "log", "regex", "sqlparser", - "strum", ] [[package]] name = "datafusion-substrait" -version = "42.2.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" +checksum = "d2ec36dd38512b1ecc7a3bb92e72046b944611b2f0d709445c1e51b0143bffd4" dependencies = [ "arrow-buffer", "async-recursion", + "async-trait", "chrono", "datafusion", "itertools 0.13.0", "object_store 0.11.1", "pbjson-types", - "prost 0.13.3", + "prost 0.13.4", "substrait", "url", ] @@ -3062,8 +3024,8 @@ dependencies = [ "permutation", "pin-project", "prost 0.12.6", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", "rand", "roaring", "serde", @@ -3119,7 +3081,7 @@ dependencies = [ "num_cpus", "object_store 0.10.2", "pin-project", - "prost 0.13.3", + "prost 0.13.4", "rand", "roaring", "serde_json", @@ -3152,7 +3114,7 @@ dependencies = [ "lance-core", "lazy_static", "log", - "prost 0.13.3", + "prost 0.13.4", "snafu 0.7.5", "tokio", ] @@ -3199,9 +3161,9 @@ dependencies = [ "log", "num-traits", "paste", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "rand", "seq-macro", "snafu 0.7.5", @@ -3234,9 +3196,9 @@ dependencies = [ "log", "num-traits", "object_store 0.10.2", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "roaring", "snafu 0.7.5", "tempfile", @@ -3284,8 +3246,8 @@ dependencies = [ "moka", "num-traits", "object_store 0.10.2", - "prost 0.13.3", - "prost-build 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", "rand", "rayon", "roaring", @@ -3328,7 +3290,7 @@ dependencies = [ "object_store 0.10.2", "path_abs", "pin-project", - "prost 0.13.3", + "prost 0.13.4", "rand", "shellexpand", "snafu 0.7.5", @@ -3384,9 +3346,9 @@ dependencies = [ "lazy_static", "log", "object_store 0.10.2", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", "rand", "rangemap", "roaring", @@ -3632,17 +3594,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "match_cfg" version = "0.1.0" @@ -4169,8 +4120,8 @@ checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ "heck 0.5.0", "itertools 0.13.0", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", ] [[package]] @@ -4183,8 +4134,8 @@ dependencies = [ "chrono", "pbjson", "pbjson-build", - "prost 0.13.3", - "prost-build 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", "serde", ] @@ -4384,12 +4335,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", - "prost-derive 0.13.3", + "prost-derive 0.13.4", ] [[package]] @@ -4422,7 +4373,7 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.10.5", + "itertools 0.12.1", "log", "multimap", "once_cell", @@ -4437,20 +4388,19 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" dependencies = [ - "bytes", "heck 0.5.0", - "itertools 0.10.5", + "itertools 0.13.0", "log", "multimap", "once_cell", "petgraph", "prettyplease 0.2.25", - "prost 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-types 0.13.4", "regex", "syn 2.0.90", "tempfile", @@ -4476,7 +4426,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.12.1", "proc-macro2", "quote", "syn 2.0.90", @@ -4484,12 +4434,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.90", @@ -4515,11 +4465,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", ] [[package]] @@ -4550,7 +4500,7 @@ dependencies = [ "lazy_static", "log", "object_store 0.10.2", - "prost 0.13.3", + "prost 0.13.4", "prost-build 0.11.9", "pyo3", "serde", @@ -4861,11 +4811,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.9.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" +checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.2", "memchr", ] @@ -5207,9 +5157,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" dependencies = [ "serde", ] @@ -5222,18 +5172,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", @@ -5441,9 +5391,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -5451,9 +5401,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", @@ -5514,18 +5464,19 @@ dependencies = [ [[package]] name = "substrait" -version = "0.41.9" +version = "0.50.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" +checksum = "ec739d0e67c4d681142069ef11de5f5325b4ece5ebd05586ed17fc8443581ae2" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease 0.2.25", - "prost 0.13.3", - "prost-build 0.13.3", - "prost-types 0.13.3", + "prost 0.13.4", + "prost-build 0.13.4", + "prost-types 0.13.4", + "regress", "schemars", "semver", "serde", @@ -6139,9 +6090,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" dependencies = [ "typify-impl", "typify-macro", @@ -6149,9 +6100,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" dependencies = [ "heck 0.5.0", "log", @@ -6169,9 +6120,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" dependencies = [ "proc-macro2", "quote", @@ -6729,15 +6680,6 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yada" version = "0.5.1" diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 03e392bcd1..47835ebc4d 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -21,7 +21,7 @@ datafusion.workspace = true datafusion-common.workspace = true datafusion-functions.workspace = true datafusion-physical-expr.workspace = true -datafusion-substrait = { version = "42.0", optional = true } +datafusion-substrait = { version = "44.0", optional = true } futures.workspace = true lance-arrow.workspace = true lance-core = { workspace = true, features = ["datafusion"] } @@ -32,7 +32,7 @@ snafu.workspace = true tokio.workspace = true [dev-dependencies] -substrait-expr = { version = "0.2.2" } +substrait-expr = { version = "0.2.3" } lance-datagen.workspace = true [features] diff --git a/rust/lance-datafusion/src/exec.rs b/rust/lance-datafusion/src/exec.rs index c3f64e1bec..ac6b633c88 100644 --- a/rust/lance-datafusion/src/exec.rs +++ b/rust/lance-datafusion/src/exec.rs @@ -14,13 +14,15 @@ use datafusion::{ context::{SessionConfig, SessionContext}, disk_manager::DiskManagerConfig, memory_pool::FairSpillPool, - runtime_env::{RuntimeConfig, RuntimeEnv}, + runtime_env::RuntimeEnvBuilder, TaskContext, }, physical_plan::{ - display::DisplayableExecutionPlan, stream::RecordBatchStreamAdapter, - streaming::PartitionStream, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, - SendableRecordBatchStream, + display::DisplayableExecutionPlan, + execution_plan::{Boundedness, EmissionType}, + stream::RecordBatchStreamAdapter, + streaming::PartitionStream, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream, }, }; use datafusion_common::{DataFusionError, Statistics}; @@ -57,7 +59,8 @@ impl OneShotExec { properties: PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::RoundRobinBatch(1), - datafusion::physical_plan::ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ), } } @@ -199,14 +202,15 @@ impl LanceExecutionOptions { pub fn new_session_context(options: LanceExecutionOptions) -> SessionContext { let session_config = SessionConfig::new(); - let mut runtime_config = RuntimeConfig::new(); + let mut runtime_env_builder = RuntimeEnvBuilder::new(); if options.use_spilling() { - runtime_config.disk_manager = DiskManagerConfig::NewOs; - runtime_config.memory_pool = Some(Arc::new(FairSpillPool::new( - options.mem_pool_size() as usize - ))); + runtime_env_builder = runtime_env_builder + .with_disk_manager(DiskManagerConfig::new()) + .with_memory_pool(Arc::new(FairSpillPool::new( + options.mem_pool_size() as usize + ))); } - let runtime_env = Arc::new(RuntimeEnv::new(runtime_config).unwrap()); + let runtime_env = runtime_env_builder.build_arc().unwrap(); SessionContext::new_with_config_rt(session_config, runtime_env) } @@ -270,6 +274,16 @@ struct OneShotPartitionStream { schema: Arc, } +impl std::fmt::Debug for OneShotPartitionStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let data = self.data.lock().unwrap(); + f.debug_struct("OneShotPartitionStream") + .field("exhausted", &data.is_none()) + .field("schema", self.schema.as_ref()) + .finish() + } +} + impl OneShotPartitionStream { fn new(data: SendableRecordBatchStream) -> Self { let schema = data.schema(); diff --git a/rust/lance-datafusion/src/planner.rs b/rust/lance-datafusion/src/planner.rs index aa596d05c7..d9d9b44e0d 100644 --- a/rust/lance-datafusion/src/planner.rs +++ b/rust/lance-datafusion/src/planner.rs @@ -23,7 +23,7 @@ use datafusion::config::ConfigOptions; use datafusion::error::Result as DFResult; use datafusion::execution::config::SessionConfig; use datafusion::execution::context::SessionState; -use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::planner::{ExprPlanner, PlannerResult, RawFieldAccessExpr}; @@ -162,8 +162,7 @@ struct LanceContextProvider { impl Default for LanceContextProvider { fn default() -> Self { let config = SessionConfig::new(); - let runtime_config = RuntimeConfig::new(); - let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap()); + let runtime = RuntimeEnvBuilder::new().build_arc().unwrap(); let mut state_builder = SessionStateBuilder::new() .with_config(config) .with_runtime_env(runtime) @@ -660,6 +659,7 @@ impl Planner { expr, pattern, escape_char, + any: _, } => Ok(Expr::Like(Like::new( *negated, Box::new(self.parse_sql_expr(expr)?), @@ -672,6 +672,7 @@ impl Planner { expr, pattern, escape_char, + any: _, } => Ok(Expr::Like(Like::new( *negated, Box::new(self.parse_sql_expr(expr)?), diff --git a/rust/lance-datafusion/src/sql.rs b/rust/lance-datafusion/src/sql.rs index 88b4415eda..8d74ffc8d7 100644 --- a/rust/lance-datafusion/src/sql.rs +++ b/rust/lance-datafusion/src/sql.rs @@ -129,7 +129,8 @@ mod tests { negated: false, expr: Box::new(Expr::Identifier(Ident::new("a"))), pattern: Box::new(Expr::Value(Value::SingleQuotedString("abc%".to_string()))), - escape_char: None + escape_char: None, + any: false, }, expr ); diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 6f835a85f5..92ee4edc8d 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -50,8 +50,10 @@ pub fn encode_substrait(expr: Expr, schema: Arc) -> Result> let session_context = SessionContext::new(); - let substrait_plan = - datafusion_substrait::logical_plan::producer::to_substrait_plan(&plan, &session_context)?; + let substrait_plan = datafusion_substrait::logical_plan::producer::to_substrait_plan( + &plan, + &session_context.state(), + )?; if let Some(plan_rel::RelType::Root(root)) = &substrait_plan.relations[0].rel_type { if let Some(rel::RelType::Filter(filt)) = &root.input.as_ref().unwrap().rel_type { @@ -359,9 +361,11 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Res }, dummy_table, )?; - let df_plan = - datafusion_substrait::logical_plan::consumer::from_substrait_plan(&session_context, &plan) - .await?; + let df_plan = datafusion_substrait::logical_plan::consumer::from_substrait_plan( + &session_context.state(), + &plan, + ) + .await?; let expr = df_plan.expressions().pop().unwrap(); diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index 2624c81691..b2beba0785 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -22,7 +22,7 @@ use datafusion::{ }; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::Accumulator; -use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr}; +use datafusion_physical_expr::{expressions::Column, LexOrdering, PhysicalSortExpr}; use deepsize::{Context, DeepSizeOf}; use futures::{ future::BoxFuture, @@ -1281,7 +1281,10 @@ impl TrainingSource for BTreeUpdater { // The UnionExec creates multiple partitions but the SortPreservingMergeExec merges // them back into a single partition. let all_data = Arc::new(UnionExec::new(vec![old_input, new_input])); - let ordered = Arc::new(SortPreservingMergeExec::new(vec![sort_expr], all_data)); + let ordered = Arc::new(SortPreservingMergeExec::new( + LexOrdering::new(vec![sort_expr]), + all_data, + )); let unchunked = execute_plan( ordered, LanceExecutionOptions { diff --git a/rust/lance/src/datafusion/dataframe.rs b/rust/lance/src/datafusion/dataframe.rs index e5b12b006c..f0edd79c77 100644 --- a/rust/lance/src/datafusion/dataframe.rs +++ b/rust/lance/src/datafusion/dataframe.rs @@ -22,6 +22,7 @@ use lance_core::{ROW_ADDR_FIELD, ROW_ID_FIELD}; use crate::Dataset; +#[derive(Debug)] pub struct LanceTableProvider { dataset: Arc, full_schema: Arc, @@ -153,6 +154,14 @@ impl OneShotPartitionStream { } } +impl std::fmt::Debug for OneShotPartitionStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OneShotPartitionStream") + .field("schema", &self.schema) + .finish() + } +} + impl PartitionStream for OneShotPartitionStream { fn schema(&self) -> &SchemaRef { &self.schema diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index 0e638c7470..265c9a2220 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -35,7 +35,7 @@ use datafusion::physical_plan::{ use datafusion::scalar::ScalarValue; use datafusion_expr::Operator; use datafusion_physical_expr::aggregate::AggregateExprBuilder; -use datafusion_physical_expr::{Partitioning, PhysicalExpr}; +use datafusion_physical_expr::{LexOrdering, Partitioning, PhysicalExpr}; use futures::future::BoxFuture; use futures::stream::{Stream, StreamExt}; use futures::{FutureExt, TryStreamExt}; @@ -1049,7 +1049,7 @@ impl Scanner { let count_plan = Arc::new(AggregateExec::try_new( AggregateMode::Single, PhysicalGroupBy::new_single(Vec::new()), - vec![count_expr], + vec![Arc::new(count_expr)], vec![None], plan, plan_schema, @@ -1437,7 +1437,7 @@ impl Scanner { }) }) .collect::>>()?; - plan = Arc::new(SortExec::new(col_exprs, plan)); + plan = Arc::new(SortExec::new(LexOrdering::new(col_exprs), plan)); } // Stage 4: limit / offset @@ -1598,13 +1598,15 @@ impl Scanner { let fts_node = Arc::new(AggregateExec::try_new( AggregateMode::Single, PhysicalGroupBy::new_single(group_expr), - vec![AggregateExprBuilder::new( - functions_aggregate::min_max::max_udaf(), - vec![expressions::col(SCORE_COL, &schema)?], - ) - .schema(schema.clone()) - .alias(SCORE_COL) - .build()?], + vec![Arc::new( + AggregateExprBuilder::new( + functions_aggregate::min_max::max_udaf(), + vec![expressions::col(SCORE_COL, &schema)?], + ) + .schema(schema.clone()) + .alias(SCORE_COL) + .build()?, + )], vec![None], fts_node, schema, @@ -1618,7 +1620,8 @@ impl Scanner { }; Ok(Arc::new( - SortExec::new(vec![sort_expr], fts_node).with_fetch(self.limit.map(|l| l as usize)), + SortExec::new(LexOrdering::new(vec![sort_expr]), fts_node) + .with_fetch(self.limit.map(|l| l as usize)), )) } @@ -2072,13 +2075,13 @@ impl Scanner { // Use DataFusion's [SortExec] for Top-K search let sort = SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: expressions::col(DIST_COL, knn_plan.schema().as_ref())?, options: SortOptions { descending: false, nulls_first: false, }, - }], + }]), knn_plan, ) .with_fetch(Some(q.k)); @@ -2108,7 +2111,7 @@ impl Scanner { }, }; Ok(Arc::new( - SortExec::new(vec![sort_expr], inner_fanout_search) + SortExec::new(LexOrdering::new(vec![sort_expr]), inner_fanout_search) .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), )) } @@ -2152,26 +2155,28 @@ impl Scanner { expressions::col(ROW_ID, schema.as_ref())?, ROW_ID.to_string(), )]; - // for now multivector is always with cosine distance so here convert the distance to `1 - distance`, + // for now multivector is always with cosine distance so here convert the distance to `1 - distance` + // and calculate the sum across all rows with the same row id. + let sum_expr = AggregateExprBuilder::new( + functions_aggregate::sum::sum_udaf(), + vec![expressions::binary( + expressions::lit(1.0), + datafusion_expr::Operator::Minus, + expressions::cast( + expressions::col(DIST_COL, &schema)?, + &schema, + DataType::Float64, + )?, + &schema, + )?], + ) + .schema(schema.clone()) + .alias(DIST_COL) + .build()?; let ann_node: Arc = Arc::new(AggregateExec::try_new( AggregateMode::Single, PhysicalGroupBy::new_single(group_expr), - vec![AggregateExprBuilder::new( - functions_aggregate::sum::sum_udaf(), - vec![expressions::binary( - expressions::lit(1.0), - datafusion_expr::Operator::Minus, - expressions::cast( - expressions::col(DIST_COL, &schema)?, - &schema, - DataType::Float64, - )?, - &schema, - )?], - ) - .schema(schema.clone()) - .alias(DIST_COL) - .build()?], + vec![Arc::new(sum_expr)], vec![None], ann_node, schema, @@ -2185,7 +2190,7 @@ impl Scanner { }, }; let ann_node = Arc::new( - SortExec::new(vec![sort_expr], ann_node) + SortExec::new(LexOrdering::new(vec![sort_expr]), ann_node) .with_fetch(Some(q.k * q.refine_factor.unwrap_or(1) as usize)), ); diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 1d603dec40..ddf6de7d9e 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -31,14 +31,16 @@ use datafusion::{ context::{SessionConfig, SessionContext}, memory_pool::MemoryConsumer, }, - logical_expr::{Expr, JoinType}, + logical_expr::{self, Expr, JoinType}, physical_plan::{ joins::{HashJoinExec, PartitionMode}, + projection::ProjectionExec, repartition::RepartitionExec, stream::RecordBatchStreamAdapter, union::UnionExec, ColumnarValue, ExecutionPlan, PhysicalExpr, SendableRecordBatchStream, }, + prelude::DataFrame, scalar::ScalarValue, }; @@ -512,9 +514,16 @@ impl MergeInsertJob { )?); } + // We need to prefix the fields in the target with target_ so that we don't have any duplicate + // field names (DF doesn't support this as of version 44) + target = Self::prefix_columns_phys(target, "target_"); + // 6 - Finally, join the input (source table) with the taken data (target table) let source_key = Column::new_with_schema(&index_column, shared_input.schema().as_ref())?; - let target_key = Column::new_with_schema(&index_column, target.schema().as_ref())?; + let target_key = Column::new_with_schema( + &format!("target_{}", index_column), + target.schema().as_ref(), + )?; let joined = Arc::new( HashJoinExec::try_new( shared_input, @@ -537,6 +546,38 @@ impl MergeInsertJob { ) } + fn prefix_columns(df: DataFrame, prefix: &str) -> DataFrame { + let schema = df.schema(); + let columns = schema + .fields() + .iter() + .map(|f| { + // Need to "quote" the column name so it gets interpreted case-sensitively + logical_expr::col(format!("\"{}\"", f.name())).alias(format!( + "{}{}", + prefix, + f.name() + )) + }) + .collect::>(); + df.select(columns).unwrap() + } + + fn prefix_columns_phys(inp: Arc, prefix: &str) -> Arc { + let schema = inp.schema(); + let exprs = schema + .fields() + .iter() + .enumerate() + .map(|(idx, f)| { + let col = Arc::new(Column::new(f.name(), idx)) as Arc; + let new_name = format!("{}{}", prefix, f.name()); + (col, new_name) + }) + .collect::>(); + Arc::new(ProjectionExec::try_new(exprs, inp).unwrap()) + } + // If the join keys are not indexed then we need to do a full scan of the table async fn create_full_table_joined_stream( &self, @@ -552,12 +593,21 @@ impl MergeInsertJob { .iter() .map(|c| c.as_str()) .collect::>(); // vector of strings of col names to join + let target_cols = self + .params + .on + .iter() + .map(|c| format!("target_{}", c)) + .collect::>(); + let target_cols = target_cols.iter().map(|s| s.as_str()).collect::>(); match self.check_compatible_schema(&schema)? { SchemaComparison::FullCompatible => { let existing = session_ctx.read_lance(self.dataset.clone(), true, false)?; + // We need to rename the columns from the target table so that they don't conflict with the source table + let existing = Self::prefix_columns(existing, "target_"); let joined = - new_data.join(existing, JoinType::Full, &join_cols, &join_cols, None)?; // full join + new_data.join(existing, JoinType::Full, &join_cols, &target_cols, None)?; // full join Ok(joined.execute_stream().await?) } SchemaComparison::Subschema => { @@ -569,18 +619,27 @@ impl MergeInsertJob { .chain([ROW_ID, ROW_ADDR]) .collect::>(); let projected = existing.select_columns(&columns)?; + // We need to rename the columns from the target table so that they don't conflict with the source table + let projected = Self::prefix_columns(projected, "target_"); // We aren't supporting inserts or deletes right now, so we can use inner join let join_type = if self.params.insert_not_matched { JoinType::Left } else { JoinType::Inner }; - let joined = new_data.join(projected, join_type, &join_cols, &join_cols, None)?; + let joined = new_data.join(projected, join_type, &join_cols, &target_cols, None)?; Ok(joined.execute_stream().await?) } } } + /// Join the source and target data streams + /// + /// If there is a scalar index on the join key, we can use it to do an indexed join. Otherwise we need to do + /// a full outer join. + /// + /// Datafusion doesn't allow duplicate column names so during this join we rename the columns from target and + /// prefix them with _target. async fn create_joined_stream( &self, source: SendableRecordBatchStream, diff --git a/rust/lance/src/io/exec/fts.rs b/rust/lance/src/io/exec/fts.rs index 6984045d4d..c8a7f42f7c 100644 --- a/rust/lance/src/io/exec/fts.rs +++ b/rust/lance/src/io/exec/fts.rs @@ -9,10 +9,9 @@ use arrow_schema::SchemaRef; use datafusion::common::Statistics; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, -}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use futures::stream::{self}; use futures::{StreamExt, TryStreamExt}; @@ -64,7 +63,8 @@ impl FtsExec { let properties = PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, @@ -219,7 +219,8 @@ impl FlatFtsExec { let properties = PlanProperties::new( EquivalenceProperties::new(FTS_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, diff --git a/rust/lance/src/io/exec/knn.rs b/rust/lance/src/io/exec/knn.rs index 87d6afce36..37358897ba 100644 --- a/rust/lance/src/io/exec/knn.rs +++ b/rust/lance/src/io/exec/knn.rs @@ -11,13 +11,16 @@ use arrow_array::{ ArrayRef, RecordBatch, StringArray, }; use arrow_schema::{DataType, Field, Schema, SchemaRef}; -use datafusion::common::stats::Precision; use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::physical_plan::PlanProperties; use datafusion::physical_plan::{ stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics, }; -use datafusion::physical_plan::{ExecutionMode, PlanProperties}; +use datafusion::{ + common::stats::Precision, + physical_plan::execution_plan::{Boundedness, EmissionType}, +}; use datafusion_physical_expr::EquivalenceProperties; use futures::stream::repeat_with; use futures::{future, stream, StreamExt, TryFutureExt, TryStreamExt}; @@ -277,7 +280,8 @@ impl ANNIvfPartitionExec { let properties = PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Ok(Self { @@ -436,7 +440,8 @@ impl ANNIvfSubIndexExec { let properties = PlanProperties::new( EquivalenceProperties::new(KNN_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Ok(Self { input, diff --git a/rust/lance/src/io/exec/optimizer.rs b/rust/lance/src/io/exec/optimizer.rs index d84ddf33f6..79dddcb1bf 100644 --- a/rust/lance/src/io/exec/optimizer.rs +++ b/rust/lance/src/io/exec/optimizer.rs @@ -19,6 +19,7 @@ use datafusion::{ use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; /// Rule that eliminates [TakeExec] nodes that are immediately followed by another [TakeExec]. +#[derive(Debug)] pub struct CoalesceTake; impl CoalesceTake { @@ -115,6 +116,7 @@ impl PhysicalOptimizerRule for CoalesceTake { /// Rule that eliminates [ProjectionExec] nodes that projects all columns /// from its input with no additional expressions. +#[derive(Debug)] pub struct SimplifyProjection; impl PhysicalOptimizerRule for SimplifyProjection { diff --git a/rust/lance/src/io/exec/pushdown_scan.rs b/rust/lance/src/io/exec/pushdown_scan.rs index 92bdf48132..ee1a83868d 100644 --- a/rust/lance/src/io/exec/pushdown_scan.rs +++ b/rust/lance/src/io/exec/pushdown_scan.rs @@ -15,7 +15,8 @@ use datafusion::logical_expr::col; use datafusion::logical_expr::interval_arithmetic::{Interval, NullableInterval}; use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}; use datafusion::physical_expr::execution_props::ExecutionProps; -use datafusion::physical_plan::{ColumnarValue, ExecutionMode, PlanProperties}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::{ColumnarValue, PlanProperties}; use datafusion::scalar::ScalarValue; use datafusion::{ physical_plan::{ @@ -131,7 +132,8 @@ impl LancePushdownScanExec { let properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Ok(Self { diff --git a/rust/lance/src/io/exec/scalar_index.rs b/rust/lance/src/io/exec/scalar_index.rs index 319b4870af..024be132b2 100644 --- a/rust/lance/src/io/exec/scalar_index.rs +++ b/rust/lance/src/io/exec/scalar_index.rs @@ -9,8 +9,9 @@ use async_trait::async_trait; use datafusion::{ common::{stats::Precision, Statistics}, physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode, - ExecutionPlan, Partitioning, PlanProperties, + execution_plan::{Boundedness, EmissionType}, + stream::RecordBatchStreamAdapter, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, }, scalar::ScalarValue, }; @@ -89,7 +90,8 @@ impl ScalarIndexExec { let properties = PlanProperties::new( EquivalenceProperties::new(SCALAR_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, @@ -190,7 +192,8 @@ impl MapIndexExec { let properties = PlanProperties::new( EquivalenceProperties::new(INDEX_LOOKUP_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, @@ -394,7 +397,8 @@ impl MaterializeIndexExec { let properties = PlanProperties::new( EquivalenceProperties::new(MATERIALIZE_INDEX_SCHEMA.clone()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, diff --git a/rust/lance/src/io/exec/scan.rs b/rust/lance/src/io/exec/scan.rs index 9cd6ac825f..662c0c8167 100644 --- a/rust/lance/src/io/exec/scan.rs +++ b/rust/lance/src/io/exec/scan.rs @@ -11,6 +11,7 @@ use arrow_array::RecordBatch; use arrow_schema::{Schema as ArrowSchema, SchemaRef}; use datafusion::common::stats::Precision; use datafusion::error::{DataFusionError, Result}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, @@ -476,7 +477,8 @@ impl LanceScanExec { let properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::RoundRobinBatch(1), - datafusion::physical_plan::ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { dataset, diff --git a/rust/lance/src/io/exec/testing.rs b/rust/lance/src/io/exec/testing.rs index 2864acde03..611cf5480b 100644 --- a/rust/lance/src/io/exec/testing.rs +++ b/rust/lance/src/io/exec/testing.rs @@ -12,8 +12,8 @@ use datafusion::{ common::Statistics, execution::context::TaskContext, physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, - SendableRecordBatchStream, + execution_plan::{Boundedness, EmissionType}, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SendableRecordBatchStream, }, }; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; @@ -29,7 +29,8 @@ impl TestingExec { let properties = PlanProperties::new( EquivalenceProperties::new(batches[0].schema()), Partitioning::RoundRobinBatch(1), - ExecutionMode::Bounded, + EmissionType::Incremental, + Boundedness::Bounded, ); Self { batches,