Skip to content

Commit

Permalink
skip iterating if there are no row groups after prune
Browse files Browse the repository at this point in the history
  • Loading branch information
bchalk101 committed Jan 23, 2024
1 parent 2cd3cf8 commit a8c771b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
11 changes: 9 additions & 2 deletions crates/polars-io/src/parquet/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,15 +367,22 @@ impl ParquetAsyncReader {

pub async fn finish(mut self) -> PolarsResult<DataFrame> {
let rechunk = self.rechunk;
let metadata = self.get_metadata().await?.clone();
let reader_schema = self.schema().await?;
let row_index = self.row_index.clone();
let hive_partition_columns = self.hive_partition_columns.clone();
let projection = self.projection.clone();

// batched reader deals with slice pushdown
let reader = self.batched(usize::MAX).await?;
let n_batches = metadata.row_groups.len();
let n_batches = reader.num_row_groups();
if n_batches == 0 {
return Ok(materialize_empty_df(
projection.as_deref(),
reader_schema.as_ref(),
hive_partition_columns.as_deref(),
row_index.as_ref(),
));
}
let mut iter = reader.iter(n_batches);

let mut chunks = Vec::with_capacity(n_batches);
Expand Down
4 changes: 4 additions & 0 deletions crates/polars-io/src/parquet/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,10 @@ impl BatchedParquetReader {
self.limit == 0
}

pub fn num_row_groups(&self) -> usize {
self.row_group_metadata.len()
}

pub fn schema(&self) -> &ArrowSchemaRef {
&self.schema
}
Expand Down

0 comments on commit a8c771b

Please sign in to comment.