-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: add from_wkt/to_wkt functions #50
Changes from 13 commits
ba1a631
fa81588
5ed5d6c
af062a7
00e7d6e
2040dba
dcc82d7
de19f0e
1c785bf
3592716
6825e27
02f5b60
6a5ba23
4cfff46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
namespace spherely { | ||
|
||
const double EARTH_RADIUS_METERS = 6371.01 * 1000; | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#include <s2/s1angle.h> | ||
#include <s2geography.h> | ||
|
||
#include "constants.hpp" | ||
#include "creation.hpp" | ||
#include "geography.hpp" | ||
#include "pybind11.hpp" | ||
|
||
namespace py = pybind11; | ||
namespace s2geog = s2geography; | ||
using namespace spherely; | ||
|
||
class FromWKT { | ||
public: | ||
FromWKT(bool oriented, bool planar, float tessellate_tol_m = 100.0) { | ||
#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \ | ||
(S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2) | ||
s2geog::geoarrow::ImportOptions options; | ||
options.set_oriented(oriented); | ||
if (planar) { | ||
auto tol = S1Angle::Radians(tessellate_tol_m / EARTH_RADIUS_METERS); | ||
options.set_tessellate_tolerance(tol); | ||
} | ||
m_reader = std::make_shared<s2geog::WKTReader>(options); | ||
#else | ||
if (planar || oriented) { | ||
throw std::invalid_argument( | ||
"planar and oriented options are only available with s2geography >= 0.2"); | ||
} | ||
m_reader = std::make_shared<s2geog::WKTReader>(); | ||
#endif | ||
} | ||
|
||
PyObjectGeography operator()(py::str a) const { | ||
return make_py_geography(m_reader->read_feature(a)); | ||
} | ||
|
||
private: | ||
std::shared_ptr<s2geog::WKTReader> m_reader; | ||
}; | ||
|
||
py::str to_wkt(PyObjectGeography a) { | ||
s2geog::WKTWriter writer; | ||
auto res = writer.write_feature(a.as_geog_ptr()->geog()); | ||
return py::str(res); | ||
} | ||
|
||
void init_io(py::module& m) { | ||
m.def( | ||
"from_wkt", | ||
[](py::array_t<py::str> a, bool oriented, bool planar, float tessellate_tol_m) { | ||
return py::vectorize(FromWKT(oriented, planar, tessellate_tol_m))(std::move(a)); | ||
}, | ||
py::arg("a"), | ||
py::arg("oriented") = false, | ||
py::arg("planar") = false, | ||
py::arg("tessellate_tol_m") = 100.0, | ||
R"pbdoc( | ||
Creates geographies from the Well-Known Text (WKT) representation. | ||
|
||
Parameters | ||
---------- | ||
a : str or array_like | ||
WKT strings. | ||
oriented : bool, default False | ||
Set to True if polygon ring directions are known to be correct | ||
(i.e., exterior rings are defined counter clockwise and interior | ||
rings are defined clockwise). | ||
By default (False), it will return the polygon with the smaller | ||
area. | ||
planar : bool, default False | ||
If set to True, the edges of linestrings and polygons are assumed | ||
to be linear on the plane. In that case, additional points will | ||
be added to the line while creating the geography objects, to | ||
ensure every point is within 100m of the original line. | ||
By default (False), it is assumed that the edges are spherical | ||
(i.e. represent the shortest path on the sphere between two points). | ||
tessellate_tol_m : float, default 100.0 | ||
The maximum distance in meters that a point must be moved to | ||
satisfy the planar edge constraint. This is only used if `planar` | ||
is set to True. | ||
|
||
)pbdoc"); | ||
|
||
m.def("to_wkt", | ||
py::vectorize(&to_wkt), | ||
py::arg("a"), | ||
R"pbdoc( | ||
Returns the WKT representation of each geography. | ||
|
||
Parameters | ||
---------- | ||
a : :py:class:`Geography` or array_like | ||
Geography object(s) | ||
|
||
)pbdoc"); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,8 @@ from typing import ( | |
import numpy as np | ||
import numpy.typing as npt | ||
|
||
__version__: str = ... | ||
__s2geography_version__: str = ... | ||
EARTH_RADIUS_METERS: float = ... | ||
|
||
class Geography: | ||
|
@@ -183,3 +185,14 @@ convex_hull: _VFunc_Nin1_Nout1[ | |
Literal["convex_hull"], PolygonGeography, PolygonGeography | ||
] | ||
distance: _VFunc_Nin2optradius_Nout1[Literal["distance"], float, float] | ||
|
||
# io functions | ||
|
||
to_wkt: _VFunc_Nin1_Nout1[Literal["to_wkt"], str, object] | ||
benbovy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def from_wkt( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically this is also a fully vectorized function, i.e., Or do we want to restrict There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Used a lambda in the |
||
a: Iterable[str], | ||
oriented: bool = False, | ||
planar: bool = False, | ||
tessellate_tol_m: float = 100.0, | ||
) -> npt.NDArray[Any]: ... |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import numpy as np | ||
import pytest | ||
from packaging.version import Version | ||
|
||
import spherely | ||
|
||
|
||
def test_from_wkt(): | ||
result = spherely.from_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
expected = spherely.points([1, 2, 3], [1, 2, 3]) | ||
# object equality does not yet work | ||
# np.testing.assert_array_equal(result, expected) | ||
assert spherely.equals(result, expected).all() | ||
|
||
# from explicit object dtype | ||
result = spherely.from_wkt( | ||
np.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"], dtype=object) | ||
) | ||
assert spherely.equals(result, expected).all() | ||
|
||
# from numpy string dtype | ||
result = spherely.from_wkt( | ||
np.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"], dtype="U") | ||
) | ||
assert spherely.equals(result, expected).all() | ||
|
||
|
||
def test_from_wkt_invalid(): | ||
# TODO can we provide better error type? | ||
with pytest.raises(RuntimeError): | ||
spherely.from_wkt(["POINT (1)"]) | ||
|
||
|
||
def test_from_wkt_wrong_type(): | ||
with pytest.raises(TypeError, match="expected bytes, int found"): | ||
spherely.from_wkt([1]) | ||
|
||
# TODO support missing values | ||
with pytest.raises(TypeError, match="expected bytes, NoneType found"): | ||
spherely.from_wkt(["POINT (1 1)", None]) | ||
|
||
|
||
polygon_with_bad_hole_wkt = ( | ||
"POLYGON " | ||
"((20 35, 10 30, 10 10, 30 5, 45 20, 20 35)," | ||
"(30 20, 20 25, 20 15, 30 20))" | ||
) | ||
|
||
|
||
@pytest.mark.skipif( | ||
Version(spherely.__s2geography_version__) < Version("0.2.0"), | ||
reason="Needs s2geography >= 0.2.0", | ||
) | ||
def test_from_wkt_oriented(): | ||
# by default re-orients the inner ring | ||
result = spherely.from_wkt(polygon_with_bad_hole_wkt) | ||
assert ( | ||
str(result) | ||
== "POLYGON ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (20 15, 20 25, 30 20, 20 15))" | ||
) | ||
|
||
# if we force to not orient, we get an error | ||
with pytest.raises(RuntimeError, match="Inconsistent loop orientations detected"): | ||
spherely.from_wkt(polygon_with_bad_hole_wkt, oriented=True) | ||
Comment on lines
+62
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity, is there a great performance gain when setting Maybe are there other reasons of setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might also later uniformize across the library the type of error raised (in #51 a ValueError is raised when a polygon has invalid edges). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know (without trying out) if there is a performance difference, but I think that the And I can also imagine that just checking that the orientation of all loops are the same is still cheaper than a full normalization (but that is indeed something to check) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, in this case the error comes from s2geography (in theory I could catch the error and re-throw it with a different class, because those RuntimeErrors are a bit strange) |
||
|
||
|
||
@pytest.mark.skipif( | ||
Version(spherely.__s2geography_version__) < Version("0.2.0"), | ||
reason="Needs s2geography >= 0.2.0", | ||
) | ||
def test_from_wkt_planar(): | ||
result = spherely.from_wkt("LINESTRING (-64 45, 0 45)") | ||
assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000 | ||
|
||
result = spherely.from_wkt("LINESTRING (-64 45, 0 45)", planar=True) | ||
assert spherely.distance(result, spherely.point(-30.1, 45)) < 100 | ||
|
||
result = spherely.from_wkt( | ||
"LINESTRING (-64 45, 0 45)", planar=True, tessellate_tol_m=10 | ||
) | ||
assert spherely.distance(result, spherely.point(-30.1, 45)) < 10 | ||
|
||
|
||
@pytest.mark.skipif( | ||
Version(spherely.__s2geography_version__) >= Version("0.2.0"), | ||
reason="Needs s2geography < 0.2.0", | ||
) | ||
def test_from_wkt_unsupported_keywords(): | ||
|
||
with pytest.raises(ValueError): | ||
spherely.from_wkt(polygon_with_bad_hole_wkt, oriented=True) | ||
|
||
with pytest.raises(ValueError): | ||
spherely.from_wkt("LINESTRING (-64 45, 0 45)", planar=True) | ||
|
||
|
||
def test_to_wkt(): | ||
arr = spherely.points([1.1, 2, 3], [1.1, 2, 3]) | ||
result = spherely.to_wkt(arr) | ||
expected = np.array(["POINT (1.1 1.1)", "POINT (2 2)", "POINT (3 3)"], dtype=object) | ||
np.testing.assert_array_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(nit suggestion:
_m
makes the parameter name slightly less readable IMHO and its meaning may not be obvious at a first glance without checking the docs anyway..._meters
would be more meaningful but would also make the parameter name too long).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That keyword name was taken from R s2 (https://r-spatial.github.io/s2/reference/s2_geog_point.html), but I am certainly fine with dropping the
_m
as well. I agree that from just seeing the keyword name (without reading the explanation for what it stands), it is not necessarily clear that this stands for "meters", making it an unnecessary suffix.(to be honest, I am also not convinced that "tessellation" is the best name. BigQuery uses it, but if you google for it, it is typically about dividing a plane into multiple shapes that nicely cover it without overlap, e.g. https://en.wikipedia.org/wiki/Edge_tessellation and https://en.wikipedia.org/wiki/Tessellation)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I'm more familiar with the latter meaning (dividing a surface into multiple shapes), but I'm fine with the name here.
Another name could be
densify_pts_tol
ordensify_tol
(pyproj uses densify_pts for similar problems).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that in the pyproj case it is (I assume) a uniform/regular densification, while s2 does a non-uniform (only adding points where needed to preserve the tolerance)
I renamed the keyword to
tessellate_tolerance
(making it a bit longer, but also more readable I think than the "tol", and then it is also consistent with the C++ argument)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Going to merge this, so I can fixup conflicts with the geoarrow PR. But happy to change the keyword name again in a quick follow-up.