From 71ce6d421b2fbbb2be98ed6dd8d8b69b49b3dd73 Mon Sep 17 00:00:00 2001 From: ritchie Date: Thu, 18 Jan 2024 19:08:46 +0100 Subject: [PATCH] rust tests --- crates/polars-core/src/datatypes/any_value.rs | 4 ++-- crates/polars-io/src/csv/read_impl/batched_mmap.rs | 2 +- crates/polars-io/src/csv/read_impl/batched_read.rs | 2 +- crates/polars-io/src/csv/read_impl/mod.rs | 11 +++++++---- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 4ea8d66c968b..e8afae9568df 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -1194,8 +1194,8 @@ mod test { ), (ArrowDataType::LargeUtf8, DataType::String), (ArrowDataType::Utf8, DataType::String), - (ArrowDataType::LargeBinary, DataType::BinaryOffset), - (ArrowDataType::Binary, DataType::BinaryOffset), + (ArrowDataType::LargeBinary, DataType::Binary), + (ArrowDataType::Binary, DataType::Binary), ( ArrowDataType::Time64(ArrowTimeUnit::Nanosecond), DataType::Time, diff --git a/crates/polars-io/src/csv/read_impl/batched_mmap.rs b/crates/polars-io/src/csv/read_impl/batched_mmap.rs index a34a7b18a00e..4730adca4156 100644 --- a/crates/polars-io/src/csv/read_impl/batched_mmap.rs +++ b/crates/polars-io/src/csv/read_impl/batched_mmap.rs @@ -129,7 +129,7 @@ impl<'a> CoreReader<'a> { eol_char: self.eol_char, }; - let projection = self.get_projection(); + let projection = self.get_projection()?; // RAII structure that will ensure we maintain a global stringcache #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-io/src/csv/read_impl/batched_read.rs b/crates/polars-io/src/csv/read_impl/batched_read.rs index 6247fdaa1d2f..7c7f8ea56c1c 100644 --- a/crates/polars-io/src/csv/read_impl/batched_read.rs +++ b/crates/polars-io/src/csv/read_impl/batched_read.rs @@ -212,7 +212,7 @@ impl<'a> CoreReader<'a> { 4096, ); - let projection = self.get_projection(); + let projection = self.get_projection()?; // RAII structure that will ensure we maintain a global stringcache #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-io/src/csv/read_impl/mod.rs b/crates/polars-io/src/csv/read_impl/mod.rs index 69e0072c1843..db268f92147c 100644 --- a/crates/polars-io/src/csv/read_impl/mod.rs +++ b/crates/polars-io/src/csv/read_impl/mod.rs @@ -436,16 +436,19 @@ impl<'a> CoreReader<'a> { remaining_bytes, )) } - fn get_projection(&mut self) -> Vec { + fn get_projection(&mut self) -> PolarsResult> { // we also need to sort the projection to have predictable output. // the `parse_lines` function expects this. self.projection .take() .map(|mut v| { v.sort_unstable(); - v + if let Some(idx) = v.last() { + polars_ensure!(*idx < self.schema.len(), OutOfBounds: "projection index: {} is out of bounds for csv schema with length: {}", idx, self.schema.len()) + } + Ok(v) }) - .unwrap_or_else(|| (0..self.schema.len()).collect()) + .unwrap_or_else(|| Ok((0..self.schema.len()).collect())) } fn parse_csv( @@ -457,7 +460,7 @@ impl<'a> CoreReader<'a> { let logging = verbose(); let (file_chunks, chunk_size, total_rows, starting_point_offset, bytes, remaining_bytes) = self.determine_file_chunks_and_statistics(&mut n_threads, bytes, logging)?; - let projection = self.get_projection(); + let projection = self.get_projection()?; // An empty file with a schema should return an empty DataFrame with that schema if bytes.is_empty() {