Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed dropping the geometry column #322

Merged
merged 7 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ Packaging:
- `dask>=2025.1.0` is now required.
- `python>=3.10` is now required.

Bug fixes:

- Fixed `GeoDataFrame.drop` returning a `GeoDataFrame`
instead of a `DataFrame`, when dropping the geometry
column (#321).

Version 0.4.2 (September 24, 2024)
----------------------------------
Expand Down
24 changes: 24 additions & 0 deletions dask_geopandas/_expr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Literal

import dask.dataframe.dask_expr as dx

import geopandas


def _drop(df: geopandas.GeoDataFrame, columns, errors):
return df.drop(columns=columns, errors=errors)


def _validate_axis(axis=0, none_is_zero: bool = True) -> None | Literal[0, 1]:
if axis not in (0, 1, "index", "columns", None):
raise ValueError(f"No axis named {axis}")
# convert to numeric axis
numeric_axis: dict[str | None, Literal[0, 1]] = {"index": 0, "columns": 1}
if none_is_zero:
numeric_axis[None] = 0

return numeric_axis.get(axis, axis)


class Drop(dx.expr.Drop):
operation = staticmethod(_drop)
20 changes: 20 additions & 0 deletions dask_geopandas/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import dask_geopandas

from ._expr import Drop, _validate_axis
from .geohash import _geohash
from .hilbert_distance import _hilbert_distance
from .morton_distance import _morton_distance
Expand Down Expand Up @@ -868,6 +869,25 @@ def explode(self, column=None, ignore_index=False, index_parts=None):
enforce_metadata=False,
)

@derived_from(geopandas.GeoDataFrame)
def drop(self, labels=None, axis=0, columns=None, errors="raise"):
# https://github.com/geopandas/dask-geopandas/issues/321
# Override to avoid an inplace drop, since we need
# to convert from a GeoDataFrame to a DataFrame when dropping
# the geometry column.
if columns is None and labels is None:
raise TypeError("must either specify 'columns' or 'labels'")

axis = _validate_axis(axis)

if axis == 1:
columns = labels or columns
elif axis == 0 and columns is None:
raise NotImplementedError(
"Drop currently only works for axis=1 or when columns is not None"
)
return new_collection(Drop(self, columns=columns, errors=errors))


from_geopandas = dd.from_pandas

Expand Down
15 changes: 15 additions & 0 deletions dask_geopandas/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,21 @@ def get_chunk(n):
assert_geodataframe_equal(ddf.compute(), expected)


def test_drop():
# https://github.com/geopandas/dask-geopandas/issues/321
df = dask_geopandas.from_geopandas(
geopandas.GeoDataFrame({"col": [1], "geometry": [Point(1, 1)]}), npartitions=1
)
result = df.drop(columns="geometry")
assert type(result) is dd.DataFrame

result = df.drop(columns="col")
assert type(result) is dask_geopandas.GeoDataFrame

with pytest.raises(ValueError, match="No axis named x"):
df.drop(labels="a", axis="x")


def test_core_deprecated():
with pytest.warns(FutureWarning, match="dask_geopandas.core"):
import dask_geopandas.core # noqa: F401
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,6 @@ section-order = [
]

[tool.ruff.lint.isort.sections]
"dask" = ["dask", "dask_expr"]
"dask" = ["dask"]
"geo" = ["geopandas", "shapely", "pyproj"]
"testing" = ["pytest", "pandas.testing", "numpy.testing", "geopandas.tests", "geopandas.testing"]
Loading