From ddb8c72505b08c1663db3970f52a8597b2915d57 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Sat, 9 Mar 2024 22:44:45 +0000 Subject: [PATCH 1/2] fix --- pyiceberg/manifest.py | 1 + tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 0504626d07..146c3ea45c 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -308,6 +308,7 @@ def data_file_with_partition(partition_type: StructType, format_version: Literal field_id=field.field_id, name=field.name, field_type=partition_field_to_data_file_partition_field(field.field_type), + required=False, ) for field in partition_type.fields ]) diff --git a/tests/conftest.py b/tests/conftest.py index a005966ea5..e090e7c020 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -892,7 +892,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: "data_file": { "file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet", "file_format": "PARQUET", - "partition": {"VendorID": 1, "tpep_pickup_datetime": 1925}, + "partition": {"VendorID": 1, "tpep_pickup_datetime": None}, "record_count": 95050, "file_size_in_bytes": 1265950, "block_size_in_bytes": 67108864, From 010fc36abf63eaac3a7fb5135b177addc34f17a0 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Sun, 10 Mar 2024 15:32:16 +0000 Subject: [PATCH 2/2] use partition field nullability --- pyiceberg/manifest.py | 2 +- pyiceberg/partitioning.py | 3 ++- tests/table/test_partitioning.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 146c3ea45c..03dc3199bf 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -308,7 +308,7 @@ def data_file_with_partition(partition_type: StructType, format_version: Literal field_id=field.field_id, name=field.name, field_type=partition_field_to_data_file_partition_field(field.field_type), - required=False, + required=field.required, ) for field in partition_type.fields ]) diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index 6fa0286282..a6692b325e 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -218,7 +218,8 @@ def partition_type(self, schema: Schema) -> StructType: for field in self.fields: source_type = schema.find_type(field.source_id) result_type = field.transform.result_type(source_type) - nested_fields.append(NestedField(field.field_id, field.name, result_type, required=False)) + required = schema.find_field(field.source_id).required + nested_fields.append(NestedField(field.field_id, field.name, result_type, required=required)) return StructType(*nested_fields) def partition_to_path(self, data: Record, schema: Schema) -> str: diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py index cb60c9a8e5..d7425bc351 100644 --- a/tests/table/test_partitioning.py +++ b/tests/table/test_partitioning.py @@ -127,5 +127,5 @@ def test_partition_type(table_schema_simple: Schema) -> None: assert spec.partition_type(table_schema_simple) == StructType( NestedField(field_id=1000, name="str_truncate", field_type=StringType(), required=False), - NestedField(field_id=1001, name="int_bucket", field_type=IntegerType(), required=False), + NestedField(field_id=1001, name="int_bucket", field_type=IntegerType(), required=True), )