diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 4fa04aa15031..1f3078224159 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -101,6 +101,7 @@ struct LevelContext { } /// A helper to construct [`ArrayLevels`] from a potentially nested [`Field`] +#[derive(Debug)] enum LevelInfoBuilder { /// A primitive, leaf array Primitive(ArrayLevels), @@ -132,7 +133,15 @@ enum LevelInfoBuilder { impl LevelInfoBuilder { /// Create a new [`LevelInfoBuilder`] for the given [`Field`] and parent [`LevelContext`] fn try_new(field: &Field, parent_ctx: LevelContext, array: &ArrayRef) -> Result { - assert_eq!(field.data_type(), array.data_type()); + if field.data_type() != array.data_type() { + return Err(arrow_err!(format!( + "Incompatible type. Field '{}' has type {}, array has type {}", + field.name(), + field.data_type(), + array.data_type(), + ))); + } + let is_nullable = field.is_nullable(); match array.data_type() { @@ -1835,6 +1844,21 @@ mod tests { assert_eq!(levels[0], expected_level); } + #[test] + fn mismatched_types() { + let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef; + let field = Field::new("item", DataType::Float64, false); + + let err = LevelInfoBuilder::try_new(&field, Default::default(), &array) + .unwrap_err() + .to_string(); + + assert_eq!( + err, + "Arrow: Incompatible type. Field 'item' has type Float64, array has type Int32", + ); + } + fn levels(field: &Field, array: T) -> LevelInfoBuilder { let v = Arc::new(array) as ArrayRef; LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap() diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index d9771838ada6..f3f190c01f75 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -183,7 +183,9 @@ impl ArrowWriter { /// /// If this would cause the current row group to exceed [`WriterProperties::max_row_group_size`] /// rows, the contents of `batch` will be written to one or more row groups such that all but - /// the final row group in the file contain [`WriterProperties::max_row_group_size`] rows + /// the final row group in the file contain [`WriterProperties::max_row_group_size`] rows. + /// + /// This will fail if the `batch`'s schema does not match the writer's schema. pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { if batch.num_rows() == 0 { return Ok(()); @@ -2963,4 +2965,29 @@ mod tests { .any(|kv| kv.key.as_str() == ARROW_SCHEMA_META_KEY)); } } + + #[test] + fn mismatched_schemas() { + let batch_schema = Schema::new(vec![Field::new("count", DataType::Int32, false)]); + let file_schema = Arc::new(Schema::new(vec![Field::new( + "temperature", + DataType::Float64, + false, + )])); + + let batch = RecordBatch::try_new( + Arc::new(batch_schema), + vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as _], + ) + .unwrap(); + + let mut buf = Vec::with_capacity(1024); + let mut writer = ArrowWriter::try_new(&mut buf, file_schema.clone(), None).unwrap(); + + let err = writer.write(&batch).unwrap_err().to_string(); + assert_eq!( + err, + "Arrow: Incompatible type. Field 'temperature' has type Float64, array has type Int32" + ); + } }