From 936e40eb72adb8dc63f6aa67ec94885598408e53 Mon Sep 17 00:00:00 2001
From: Ben Kirwin <bkirwi@materialize.com>
Date: Mon, 21 Oct 2024 16:19:03 -0400
Subject: [PATCH] Skip writing down null buffers for non-nullable primitive
 arrays (#6524)

* Add a new roundtrip test and improve validations

The new validations should help improve coverage from the existing
tests, but none of them currently cover the null-masking bug.

* Don't return nulls for non-nullable cols in reader

* Coverage for more non-null nested leaf types

* Update parquet/src/arrow/arrow_writer/mod.rs

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 parquet/src/arrow/arrow_writer/mod.rs  | 85 +++++++++++++++++++++++++-
 parquet/src/arrow/record_reader/mod.rs | 21 +++++--
 2 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 3ec7a3dfea36..99d54eef3bb5 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1614,6 +1614,85 @@ mod tests {
         roundtrip(batch, Some(SMALL_SIZE / 2));
     }
 
+    #[test]
+    fn arrow_writer_2_level_struct_mixed_null_2() {
+        // tests writing <struct<struct<primitive>>, where the primitive columns are non-null.
+        let field_c = Field::new("c", DataType::Int32, false);
+        let field_d = Field::new("d", DataType::FixedSizeBinary(4), false);
+        let field_e = Field::new(
+            "e",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        );
+
+        let field_b = Field::new(
+            "b",
+            DataType::Struct(vec![field_c, field_d, field_e].into()),
+            false,
+        );
+        let type_a = DataType::Struct(vec![field_b.clone()].into());
+        let field_a = Field::new("a", type_a, true);
+        let schema = Schema::new(vec![field_a.clone()]);
+
+        // create data
+        let c = Int32Array::from_iter_values(0..6);
+        let d = FixedSizeBinaryArray::try_from_iter(
+            ["aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff"].into_iter(),
+        )
+        .expect("four byte values");
+        let e = Int32DictionaryArray::from_iter(["one", "two", "three", "four", "five", "one"]);
+        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
+            .len(6)
+            .add_child_data(c.into_data())
+            .add_child_data(d.into_data())
+            .add_child_data(e.into_data())
+            .build()
+            .unwrap();
+        let b = StructArray::from(b_data);
+        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
+            .len(6)
+            .null_bit_buffer(Some(Buffer::from([0b00100101])))
+            .add_child_data(b.into_data())
+            .build()
+            .unwrap();
+        let a = StructArray::from(a_data);
+
+        assert_eq!(a.null_count(), 3);
+        assert_eq!(a.column(0).null_count(), 0);
+
+        // build a record batch
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
+
+        roundtrip(batch, Some(SMALL_SIZE / 2));
+    }
+
+    #[test]
+    fn test_empty_dict() {
+        let struct_fields = Fields::from(vec![Field::new(
+            "dict",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        )]);
+
+        let schema = Schema::new(vec![Field::new_struct(
+            "struct",
+            struct_fields.clone(),
+            true,
+        )]);
+        let dictionary = Arc::new(DictionaryArray::new(
+            Int32Array::new_null(5),
+            Arc::new(StringArray::new_null(0)),
+        ));
+
+        let s = StructArray::new(
+            struct_fields,
+            vec![dictionary],
+            Some(NullBuffer::new_null(5)),
+        );
+
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(s)]).unwrap();
+        roundtrip(batch, None);
+    }
     #[test]
     fn arrow_writer_page_size() {
         let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, false)]));
@@ -1735,7 +1814,11 @@ mod tests {
     }
 
     fn roundtrip_opts(expected_batch: &RecordBatch, props: WriterProperties) -> File {
-        roundtrip_opts_with_array_validation(expected_batch, props, |a, b| assert_eq!(a, b))
+        roundtrip_opts_with_array_validation(expected_batch, props, |a, b| {
+            a.validate_full().expect("valid expected data");
+            b.validate_full().expect("valid actual data");
+            assert_eq!(a, b)
+        })
     }
 
     struct RoundTripOptions {
diff --git a/parquet/src/arrow/record_reader/mod.rs b/parquet/src/arrow/record_reader/mod.rs
index 7456da053b9c..fbcb1069e49c 100644
--- a/parquet/src/arrow/record_reader/mod.rs
+++ b/parquet/src/arrow/record_reader/mod.rs
@@ -172,7 +172,8 @@ where
         std::mem::take(&mut self.values)
     }
 
-    /// Returns currently stored null bitmap data.
+    /// Returns currently stored null bitmap data for nullable columns.
+    /// For non-nullable columns, the bitmap is discarded.
     /// The side effect is similar to `consume_def_levels`.
     pub fn consume_bitmap_buffer(&mut self) -> Option<Buffer> {
         self.consume_bitmap()
@@ -186,11 +187,23 @@ where
         self.num_records = 0;
     }
 
-    /// Returns bitmap data.
+    /// Returns bitmap data for nullable columns.
+    /// For non-nullable columns, the bitmap is discarded.
     pub fn consume_bitmap(&mut self) -> Option<Buffer> {
-        self.def_levels
+        let mask = self
+            .def_levels
             .as_mut()
-            .map(|levels| levels.consume_bitmask())
+            .map(|levels| levels.consume_bitmask());
+
+        // While we always consume the bitmask here, we only want to return
+        // the bitmask for nullable arrays. (Marking nulls on a non-nullable
+        // array may fail validations, even if those nulls are masked off at
+        // a higher level.)
+        if self.column_desc.self_type().is_optional() {
+            mask
+        } else {
+            None
+        }
     }
 
     /// Try to read one batch of data returning the number of records read