Skip to content

Commit

Permalink
Introduce binary_as_string parquet option, upgrade to arrow/parquet…
Browse files Browse the repository at this point in the history
… `53.2.0` (#12816)

* Update to arrow-rs 53.2.0

* introduce binary_as_string parquet option

* Fix test

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
goldmedal and alamb authored Oct 25, 2024
1 parent 6a3c0b0 commit 13a4225
Show file tree
Hide file tree
Showing 18 changed files with 581 additions and 244 deletions.
18 changes: 9 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,22 @@ version = "42.1.0"
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
arrow = { version = "53.1.0", features = [
arrow = { version = "53.2.0", features = [
"prettyprint",
] }
arrow-array = { version = "53.1.0", default-features = false, features = [
arrow-array = { version = "53.2.0", default-features = false, features = [
"chrono-tz",
] }
arrow-buffer = { version = "53.1.0", default-features = false }
arrow-flight = { version = "53.1.0", features = [
arrow-buffer = { version = "53.2.0", default-features = false }
arrow-flight = { version = "53.2.0", features = [
"flight-sql-experimental",
] }
arrow-ipc = { version = "53.1.0", default-features = false, features = [
arrow-ipc = { version = "53.2.0", default-features = false, features = [
"lz4",
] }
arrow-ord = { version = "53.1.0", default-features = false }
arrow-schema = { version = "53.1.0", default-features = false }
arrow-string = { version = "53.1.0", default-features = false }
arrow-ord = { version = "53.2.0", default-features = false }
arrow-schema = { version = "53.2.0", default-features = false }
arrow-string = { version = "53.2.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "=0.4.1"
bytes = "1.4"
Expand Down Expand Up @@ -126,7 +126,7 @@ log = "^0.4"
num_cpus = "1.13.0"
object_store = { version = "0.11.0", default-features = false }
parking_lot = "0.12"
parquet = { version = "53.1.0", default-features = false, features = [
parquet = { version = "53.2.0", default-features = false, features = [
"arrow",
"async",
"object_store",
Expand Down
15 changes: 9 additions & 6 deletions benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,15 @@ impl RunOpt {
None => queries.min_query_id()..=queries.max_query_id(),
};

// configure parquet options
let mut config = self.common.config();
config
.options_mut()
.execution
.parquet
.schema_force_view_types = self.common.force_view_types;
{
let parquet_options = &mut config.options_mut().execution.parquet;
parquet_options.schema_force_view_types = self.common.force_view_types;
// The hits_partitioned dataset specifies string columns
// as binary due to how it was written. Force it to strings
parquet_options.binary_as_string = true;
}

let ctx = SessionContext::new_with_config(config);
self.register_hits(&ctx).await?;
Expand Down Expand Up @@ -148,7 +151,7 @@ impl RunOpt {
Ok(())
}

/// Registrs the `hits.parquet` as a table named `hits`
/// Registers the `hits.parquet` as a table named `hits`
async fn register_hits(&self, ctx: &SessionContext) -> Result<()> {
let options = Default::default();
let path = self.path.as_os_str().to_str().unwrap();
Expand Down
Loading

0 comments on commit 13a4225

Please sign in to comment.