Skip to content

Commit

Permalink
Merge pull request #50 from mtravis/main
Browse files Browse the repository at this point in the history
added geocoding function
  • Loading branch information
cholmes authored Nov 20, 2023
2 parents f1703b2 + e545565 commit 276386b
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ private/
*.py[cod]
*$py.class

cache
issues.txt

# C extensions
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ Will print out a help message. You then will be able run the CLI (download [1.js


```bash
ob tools get_buildings 1.json my-buildings.geojson --country_iso RW
ob tools get_buildings 1.json --dst my-buildings.geojson --country_iso RW
```

You can also stream the json in directly in one line:

```
curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - my-buildings.geojson --country_iso RW
curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - --dst my-buildings.geojson --country_iso RW
```


Expand Down Expand Up @@ -96,13 +96,17 @@ Usage: ob get_buildings [OPTIONS] [GEOJSON_INPUT] [DST]
this tool we hope to eliminate the need to hint with the country_iso.
Options:
--dst TEXT The path to write the output to. Can be a
directory or file.
--location TEXT Use city or region name instead of providing an
AOI as file.
--source [google|overture] Dataset to query, defaults to Overture
--country_iso TEXT A 2 character country ISO code to filter the
data by.
-s, --silent Suppress all print outputs.
--overwrite Overwrite the destination file if it already
exists.
--verbose Print detailed logs with timestamps.
-v, --verbose Print detailed logs with timestamps.
--help Show this message and exit.
```

Expand Down
17 changes: 14 additions & 3 deletions open_buildings/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import click
import json
import pandas as pd
import osmnx
from shapely.geometry import shape, box, mapping
import matplotlib.pyplot as plt
from open_buildings.google.process import process_benchmark, process_geometries
from open_buildings.download_buildings import download as download_buildings
Expand Down Expand Up @@ -34,15 +36,22 @@ def overture():
def handle_comma_separated(ctx, param, value):
return value.split(',')

def geocode(data: str):
location = osmnx.geocode_to_gdf(data)
geom = location.geometry[0]
geojson = json.loads(json.dumps({"type": "Feature", "geometry": mapping(geom)})) # turn geom tuple into list by (de-)serialising
return geojson

@main.command(name="get_buildings")
@click.argument('geojson_input', type=click.File('r'), required=False)
@click.argument('dst', type=str, default="buildings.json")
@click.option('--dst', type=str, default="buildings.json", help='The path to write the output to. Can be a directory or file.')
@click.option('--location', type=str, default=None, help='Use city or region name instead of providing an AOI as file.')
@click.option('--source', default="overture", type=click.Choice(['google', 'overture']), help='Dataset to query, defaults to Overture')
@click.option('--country_iso', type=str, default=None, help='A 2 character country ISO code to filter the data by.')
@click.option('-s', '--silent', is_flag=True, default=False, help='Suppress all print outputs.')
@click.option('--overwrite', default=False, is_flag=True, help='Overwrite the destination file if it already exists.')
@click.option('--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.')
def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, verbose):
@click.option('-v', '--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.')
def get_buildings(geojson_input, dst, location, source, country_iso, silent, overwrite, verbose):
"""Tool to extract buildings in common geospatial formats from large archives of GeoParquet data online. GeoJSON
input can be provided as a file or piped in from stdin. If no GeoJSON input is provided, the tool will read from stdin.
Expand Down Expand Up @@ -71,6 +80,8 @@ def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, ve

if geojson_input:
geojson_data = json.load(geojson_input)
elif location:
geojson_data = geocode(location)
else:
geojson_data = json.load(click.get_text_stream('stdin'))

Expand Down
27 changes: 8 additions & 19 deletions open_buildings/download_buildings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import click
from math import tan, cos, log, pi
from shapely.geometry import shape
from shapely.geometry import shape, box, mapping
from typing import Dict, Any, Union
import mercantile
import duckdb
Expand All @@ -13,24 +13,15 @@
import pandas as pd
import geopandas as gpd
import subprocess
from shapely import wkb
import shapely
import geojson
import shutil

import osmnx
from open_buildings.settings import Source, Format, settings

def geojson_to_quadkey(data: dict) -> str:
if 'bbox' in data:
min_lon, min_lat, max_lon, max_lat = data['bbox']
else:
coords = data['geometry']['coordinates'][0]
min_lon = min_lat = float('inf')
max_lon = max_lat = float('-inf')

for lon, lat in coords:
min_lon = min(min_lon, lon)
min_lat = min(min_lat, lat)
max_lon = max(max_lon, lon)
max_lat = max(max_lat, lat)
geom = shape(data["geometry"])
min_lon, min_lat, max_lon, max_lat = geom.bounds

for zoom in range(12, -1, -1):
tiles = list(mercantile.tiles(min_lon, min_lat, max_lon, max_lat, zooms=zoom))
Expand Down Expand Up @@ -79,7 +70,6 @@ def quadkey(geojson_input):
geojson_data = json.load(geojson_input)
else:
geojson_data = json.load(click.get_text_stream('stdin'))

result = geojson_to_quadkey(geojson_data)
click.echo(result)

Expand Down Expand Up @@ -132,9 +122,8 @@ def quad2json(quadkey_input):
result = quadkey_to_geojson(quadkey_input)
click.echo(json.dumps(result, indent=2))


def download(
geojson_data: Dict[str, Any],
geojson_data: Dict[str, Any],
dst: Union[Path, str] = "buildings.json",
source: Union[Source, str] = Source.OVERTURE,
format: Optional[Union[Format, str]] = None,
Expand Down Expand Up @@ -332,4 +321,4 @@ def print_elapsed_time(start_time):
#cli.add_command(download)

if __name__ == '__main__':
cli()
cli()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ duckdb
pandas
geopandas
pyogrio
osmnx
shapely
openlocationcode
tabulate
Expand Down
43 changes: 37 additions & 6 deletions tests/test_open_buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from pathlib import Path
import os
import json
from shapely.geometry import shape, box, mapping
import re
import subprocess

from open_buildings.download_buildings import download, geojson_to_wkt, geojson_to_quadkey, quadkey_to_geojson
from open_buildings.cli import geocode
from open_buildings.settings import Source, Format, settings

###########################################################################
Expand Down Expand Up @@ -60,11 +62,17 @@ def test_geojson_to_wkt(aoi: Dict[str, Any]):
def test_geojson_to_quadkey(aoi: Dict[str, Any]):
""" Tests geojson_to_quadkey() using a pre-established true value. """
assert geojson_to_quadkey(aoi) == '301001330310'

def test_quadkey_to_geojson():
""" Tests quadkey_to_geojson() using a pre-established true value. """
assert quadkey_to_geojson('031313131112') == {'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-0.17578125, 51.50874245880333], [-0.087890625, 51.50874245880333], [-0.087890625, 51.56341232867588], [-0.17578125, 51.56341232867588], [-0.17578125, 51.50874245880333]]]}}

def test_geocode():
""" Tests geocode() using a pre-established true value. Verifies the bbox of the returned geometry. """
geocoding_result = geocode('plymouth')
assert geocoding_result["type"] == "Feature"
assert shape(geocoding_result["geometry"]).bounds == (-4.2055324, 50.3327426, -4.0196056, 50.4441737)

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
@pytest.mark.parametrize("source", [s for s in Source])
Expand Down Expand Up @@ -145,7 +153,7 @@ def test_cli_get_buildings_from_file_to_directory(aoi: Dict[str, Any], tmp_path:
input_path = tmp_path.joinpath("input.json")
with open(input_path, "w") as f:
json.dump(aoi, f)
subprocess.run(["ob", "get_buildings", str(input_path), str(tmp_path), "--country_iso", "SC"])
subprocess.run(["ob", "get_buildings", str(input_path), "--dst", str(tmp_path), "--country_iso", "SC"], check=True)
output_path = tmp_path.joinpath("buildings.json") # default file name
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0
Expand All @@ -159,7 +167,7 @@ def test_cli_get_buildings_from_stdin_to_directory(aoi: Dict[str, Any], tmp_path
Verifies that a log message with timestamp gets written to stdout.
"""
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run([ "ob", "get_buildings", "-", str(tmp_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True,check=True, capture_output=True)
process = subprocess.run([ "ob", "get_buildings", "-", "--dst", str(tmp_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
dt_regex = re.compile(r"^\[[0-9]{4}(-[0-9]{2}){2} ([0-9]{2}:){2}[0-9]{2}\] ") # match timestamp format e.g. "[2023-10-18 19:08:24]"
assert dt_regex.search(process.stdout) # ensure that stdout contains at least one timestamped message
output_path = tmp_path.joinpath("buildings.json") # default file name
Expand All @@ -175,7 +183,7 @@ def test_cli_get_buildings_from_stdin_to_file_silent(aoi: Dict[str, Any], tmp_pa
"""
output_path = tmp_path.joinpath("test123.json")
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run(["ob", "get_buildings", "-", str(output_path), "--silent", "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--silent", "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
assert process.stdout == "" # assert that nothing gets printed to stdout
assert process.stderr == "" # assert that nothing gets printed to stdout
assert os.path.exists(output_path)
Expand All @@ -193,8 +201,31 @@ def test_cli_get_buildings_from_stdin_to_file_overwrite_false(aoi: Dict[str, Any
with open(output_path, "w") as f:
f.write("Foo bar")
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run(["ob", "get_buildings", "-", str(output_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
assert os.path.exists(output_path)
with open(output_path, "r") as f:
assert f.read() == "Foo bar" # verify that the file still has the same content as before
assert "exists" in process.stdout # verify that the user has been warned about the existing file
assert "exists" in process.stdout # verify that the user has been warned about the existing file

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_geocode(tmp_path: Path):
"""
Tests the geocoding functionality, implemented as the argument "location".
"""
output_path = tmp_path.joinpath("geocode_test.json")
subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "oxford uk", "--country_iso", "GB"], check=True)
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_geocode_multipolygon(tmp_path: Path):
"""
Tests the geocoding functionality, implemented as the argument "location". Makes sure that a MultiPolygon geometry (the outline of Dubrovnik)
is simplified to a polygon (convex hull).
"""
output_path = tmp_path.joinpath("geocode_test.json")
subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "dubrovnik", "--country_iso", "HR"], check=True)
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0

0 comments on commit 276386b

Please sign in to comment.