diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 7bc9c9f14715..20f5ff15749b 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1021,7 +1021,11 @@ def array_flatten(op, **kw): .then(None) .when(result.list.len() == 0) .then([]) - .otherwise(result.flatten()) + # polars doesn't have an efficient API (yet?) for removing one level of + # nesting from an array so we use elementwise evaluation + # + # https://github.com/ibis-project/ibis/issues/10135 + .otherwise(result.list.eval(pl.element().flatten())) ) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index cc21b1582850..436f74dab71e 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -103,6 +103,7 @@ TrinoUserError = None try: + from psycopg2.errors import ArraySubscriptError as PsycoPg2ArraySubscriptError from psycopg2.errors import DivisionByZero as PsycoPg2DivisionByZero from psycopg2.errors import IndeterminateDatatype as PsycoPg2IndeterminateDatatype from psycopg2.errors import InternalError_ as PsycoPg2InternalError @@ -118,7 +119,7 @@ PsycoPg2InvalidTextRepresentation ) = PsycoPg2DivisionByZero = PsycoPg2InternalError = PsycoPg2ProgrammingError = ( PsycoPg2OperationalError - ) = PsycoPg2UndefinedObject = None + ) = PsycoPg2UndefinedObject = PsycoPg2ArraySubscriptError = None try: from MySQLdb import NotSupportedError as MySQLNotSupportedError diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 3b440feb2293..cba8ae64d1aa 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -21,6 +21,7 @@ GoogleBadRequest, MySQLOperationalError, PolarsComputeError, + PsycoPg2ArraySubscriptError, PsycoPg2IndeterminateDatatype, PsycoPg2InternalError, PsycoPg2ProgrammingError, @@ -1006,6 +1007,11 @@ def flatten_data(): reason="Arrays are never nullable", raises=AssertionError, ), + pytest.mark.notyet( + ["polars"], + reason="flattened empty arrays incorrectly insert a null", + raises=AssertionError, + ), ], ), ], @@ -1557,3 +1563,19 @@ def test_array_agg_bool(con, data, agg, baseline_func): result = [x if pd.notna(x) else None for x in result] expected = [baseline_func(x) for x in df.x] assert result == expected + + +@pytest.mark.notyet( + ["postgres"], + raises=PsycoPg2ArraySubscriptError, + reason="all dimensions must match in size", +) +@pytest.mark.notimpl(["risingwave", "flink"], raises=com.OperationNotDefinedError) +def test_flatten(con): + t = ibis.memtable( + [{"arr": [[1, 5, 7], [3, 4]]}], schema={"arr": "array>"} + ) + expr = t.arr.flatten().name("result") + result = con.execute(expr) + expected = pd.Series([[1, 5, 7, 3, 4]], name="result") + tm.assert_series_equal(result, expected)