Skip to content

Commit

Permalink
Arrow: Support Large Binary when using to_arrow (#409)
Browse files Browse the repository at this point in the history
* Arrow: Support Large Binary

* Merge with binary

---------

Co-authored-by: Fokko Driesprong <[email protected]>
  • Loading branch information
castedice and Fokko authored Feb 10, 2024
1 parent 2e67308 commit a576fc9
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType:
return pa.binary(16)

def visit_binary(self, _: BinaryType) -> pa.DataType:
return pa.binary()
return pa.large_binary()


def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
Expand Down Expand Up @@ -882,7 +882,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
return TimestamptzType()
elif primitive.tz is None:
return TimestampType()
elif pa.types.is_binary(primitive):
elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive):
return BinaryType()
elif pa.types.is_fixed_size_binary(primitive):
primitive = cast(pa.FixedSizeBinaryType, primitive)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def pa_schema() -> pa.Schema:
# ("time", pa.time64("us")),
# Not natively supported by Arrow
# ("uuid", pa.fixed(16)),
("binary", pa.binary()),
("binary", pa.large_binary()),
("fixed", pa.binary(16)),
])

Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def test_string_type_to_pyarrow() -> None:

def test_binary_type_to_pyarrow() -> None:
iceberg_type = BinaryType()
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary()
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()


def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_pyarrow_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def test_pyarrow_string_to_iceberg() -> None:


def test_pyarrow_variable_binary_to_iceberg() -> None:
pyarrow_type = pa.binary()
pyarrow_type = pa.large_binary()
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
assert converted_iceberg_type == BinaryType()
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type
Expand Down

0 comments on commit a576fc9

Please sign in to comment.