diff --git a/.gitignore b/.gitignore index 91d05c0..265b7ea 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ private/ *.py[cod] *$py.class +cache issues.txt # C extensions diff --git a/README.md b/README.md index dedebb2..fa37b94 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,13 @@ Will print out a help message. You then will be able run the CLI (download [1.js ```bash -ob tools get_buildings 1.json my-buildings.geojson --country_iso RW +ob tools get_buildings 1.json --dst my-buildings.geojson --country_iso RW ``` You can also stream the json in directly in one line: ``` -curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - my-buildings.geojson --country_iso RW +curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - --dst my-buildings.geojson --country_iso RW ``` @@ -96,13 +96,17 @@ Usage: ob get_buildings [OPTIONS] [GEOJSON_INPUT] [DST] this tool we hope to eliminate the need to hint with the country_iso. Options: + --dst TEXT The path to write the output to. Can be a + directory or file. + --location TEXT Use city or region name instead of providing an + AOI as file. --source [google|overture] Dataset to query, defaults to Overture --country_iso TEXT A 2 character country ISO code to filter the data by. -s, --silent Suppress all print outputs. --overwrite Overwrite the destination file if it already exists. - --verbose Print detailed logs with timestamps. + -v, --verbose Print detailed logs with timestamps. --help Show this message and exit. ``` diff --git a/open_buildings/cli.py b/open_buildings/cli.py index d5c2122..8bb51b0 100644 --- a/open_buildings/cli.py +++ b/open_buildings/cli.py @@ -3,6 +3,8 @@ import click import json import pandas as pd +import osmnx +from shapely.geometry import shape, box, mapping import matplotlib.pyplot as plt from open_buildings.google.process import process_benchmark, process_geometries from open_buildings.download_buildings import download as download_buildings @@ -34,15 +36,22 @@ def overture(): def handle_comma_separated(ctx, param, value): return value.split(',') +def geocode(data: str): + location = osmnx.geocode_to_gdf(data) + geom = location.geometry[0] + geojson = json.loads(json.dumps({"type": "Feature", "geometry": mapping(geom)})) # turn geom tuple into list by (de-)serialising + return geojson + @main.command(name="get_buildings") @click.argument('geojson_input', type=click.File('r'), required=False) -@click.argument('dst', type=str, default="buildings.json") +@click.option('--dst', type=str, default="buildings.json", help='The path to write the output to. Can be a directory or file.') +@click.option('--location', type=str, default=None, help='Use city or region name instead of providing an AOI as file.') @click.option('--source', default="overture", type=click.Choice(['google', 'overture']), help='Dataset to query, defaults to Overture') @click.option('--country_iso', type=str, default=None, help='A 2 character country ISO code to filter the data by.') @click.option('-s', '--silent', is_flag=True, default=False, help='Suppress all print outputs.') @click.option('--overwrite', default=False, is_flag=True, help='Overwrite the destination file if it already exists.') -@click.option('--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.') -def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, verbose): +@click.option('-v', '--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.') +def get_buildings(geojson_input, dst, location, source, country_iso, silent, overwrite, verbose): """Tool to extract buildings in common geospatial formats from large archives of GeoParquet data online. GeoJSON input can be provided as a file or piped in from stdin. If no GeoJSON input is provided, the tool will read from stdin. @@ -71,6 +80,8 @@ def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, ve if geojson_input: geojson_data = json.load(geojson_input) + elif location: + geojson_data = geocode(location) else: geojson_data = json.load(click.get_text_stream('stdin')) diff --git a/open_buildings/download_buildings.py b/open_buildings/download_buildings.py index 610161d..e6ddc56 100644 --- a/open_buildings/download_buildings.py +++ b/open_buildings/download_buildings.py @@ -1,7 +1,7 @@ import json import click from math import tan, cos, log, pi -from shapely.geometry import shape +from shapely.geometry import shape, box, mapping from typing import Dict, Any, Union import mercantile import duckdb @@ -13,24 +13,15 @@ import pandas as pd import geopandas as gpd import subprocess -from shapely import wkb +import shapely +import geojson import shutil - +import osmnx from open_buildings.settings import Source, Format, settings def geojson_to_quadkey(data: dict) -> str: - if 'bbox' in data: - min_lon, min_lat, max_lon, max_lat = data['bbox'] - else: - coords = data['geometry']['coordinates'][0] - min_lon = min_lat = float('inf') - max_lon = max_lat = float('-inf') - - for lon, lat in coords: - min_lon = min(min_lon, lon) - min_lat = min(min_lat, lat) - max_lon = max(max_lon, lon) - max_lat = max(max_lat, lat) + geom = shape(data["geometry"]) + min_lon, min_lat, max_lon, max_lat = geom.bounds for zoom in range(12, -1, -1): tiles = list(mercantile.tiles(min_lon, min_lat, max_lon, max_lat, zooms=zoom)) @@ -79,7 +70,6 @@ def quadkey(geojson_input): geojson_data = json.load(geojson_input) else: geojson_data = json.load(click.get_text_stream('stdin')) - result = geojson_to_quadkey(geojson_data) click.echo(result) @@ -132,9 +122,8 @@ def quad2json(quadkey_input): result = quadkey_to_geojson(quadkey_input) click.echo(json.dumps(result, indent=2)) - def download( - geojson_data: Dict[str, Any], + geojson_data: Dict[str, Any], dst: Union[Path, str] = "buildings.json", source: Union[Source, str] = Source.OVERTURE, format: Optional[Union[Format, str]] = None, @@ -332,4 +321,4 @@ def print_elapsed_time(start_time): #cli.add_command(download) if __name__ == '__main__': - cli() \ No newline at end of file + cli() diff --git a/requirements.txt b/requirements.txt index 520f6c2..e581b60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ duckdb pandas geopandas pyogrio +osmnx shapely openlocationcode tabulate diff --git a/tests/test_open_buildings.py b/tests/test_open_buildings.py index 92365d9..56aa717 100644 --- a/tests/test_open_buildings.py +++ b/tests/test_open_buildings.py @@ -3,10 +3,12 @@ from pathlib import Path import os import json +from shapely.geometry import shape, box, mapping import re import subprocess from open_buildings.download_buildings import download, geojson_to_wkt, geojson_to_quadkey, quadkey_to_geojson +from open_buildings.cli import geocode from open_buildings.settings import Source, Format, settings ########################################################################### @@ -60,11 +62,17 @@ def test_geojson_to_wkt(aoi: Dict[str, Any]): def test_geojson_to_quadkey(aoi: Dict[str, Any]): """ Tests geojson_to_quadkey() using a pre-established true value. """ assert geojson_to_quadkey(aoi) == '301001330310' - + def test_quadkey_to_geojson(): """ Tests quadkey_to_geojson() using a pre-established true value. """ assert quadkey_to_geojson('031313131112') == {'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-0.17578125, 51.50874245880333], [-0.087890625, 51.50874245880333], [-0.087890625, 51.56341232867588], [-0.17578125, 51.56341232867588], [-0.17578125, 51.50874245880333]]]}} +def test_geocode(): + """ Tests geocode() using a pre-established true value. Verifies the bbox of the returned geometry. """ + geocoding_result = geocode('plymouth') + assert geocoding_result["type"] == "Feature" + assert shape(geocoding_result["geometry"]).bounds == (-4.2055324, 50.3327426, -4.0196056, 50.4441737) + @pytest.mark.integration @pytest.mark.flaky(reruns=NUM_RERUNS) @pytest.mark.parametrize("source", [s for s in Source]) @@ -145,7 +153,7 @@ def test_cli_get_buildings_from_file_to_directory(aoi: Dict[str, Any], tmp_path: input_path = tmp_path.joinpath("input.json") with open(input_path, "w") as f: json.dump(aoi, f) - subprocess.run(["ob", "get_buildings", str(input_path), str(tmp_path), "--country_iso", "SC"]) + subprocess.run(["ob", "get_buildings", str(input_path), "--dst", str(tmp_path), "--country_iso", "SC"], check=True) output_path = tmp_path.joinpath("buildings.json") # default file name assert os.path.exists(output_path) assert os.path.getsize(output_path) != 0 @@ -159,7 +167,7 @@ def test_cli_get_buildings_from_stdin_to_directory(aoi: Dict[str, Any], tmp_path Verifies that a log message with timestamp gets written to stdout. """ # we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments, - process = subprocess.run([ "ob", "get_buildings", "-", str(tmp_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True,check=True, capture_output=True) + process = subprocess.run([ "ob", "get_buildings", "-", "--dst", str(tmp_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True) dt_regex = re.compile(r"^\[[0-9]{4}(-[0-9]{2}){2} ([0-9]{2}:){2}[0-9]{2}\] ") # match timestamp format e.g. "[2023-10-18 19:08:24]" assert dt_regex.search(process.stdout) # ensure that stdout contains at least one timestamped message output_path = tmp_path.joinpath("buildings.json") # default file name @@ -175,7 +183,7 @@ def test_cli_get_buildings_from_stdin_to_file_silent(aoi: Dict[str, Any], tmp_pa """ output_path = tmp_path.joinpath("test123.json") # we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments, - process = subprocess.run(["ob", "get_buildings", "-", str(output_path), "--silent", "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True) + process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--silent", "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True) assert process.stdout == "" # assert that nothing gets printed to stdout assert process.stderr == "" # assert that nothing gets printed to stdout assert os.path.exists(output_path) @@ -193,8 +201,31 @@ def test_cli_get_buildings_from_stdin_to_file_overwrite_false(aoi: Dict[str, Any with open(output_path, "w") as f: f.write("Foo bar") # we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments, - process = subprocess.run(["ob", "get_buildings", "-", str(output_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True) + process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True) assert os.path.exists(output_path) with open(output_path, "r") as f: assert f.read() == "Foo bar" # verify that the file still has the same content as before - assert "exists" in process.stdout # verify that the user has been warned about the existing file \ No newline at end of file + assert "exists" in process.stdout # verify that the user has been warned about the existing file + +@pytest.mark.integration +@pytest.mark.flaky(reruns=NUM_RERUNS) +def test_cli_get_buildings_geocode(tmp_path: Path): + """ + Tests the geocoding functionality, implemented as the argument "location". + """ + output_path = tmp_path.joinpath("geocode_test.json") + subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "oxford uk", "--country_iso", "GB"], check=True) + assert os.path.exists(output_path) + assert os.path.getsize(output_path) != 0 + +@pytest.mark.integration +@pytest.mark.flaky(reruns=NUM_RERUNS) +def test_cli_get_buildings_geocode_multipolygon(tmp_path: Path): + """ + Tests the geocoding functionality, implemented as the argument "location". Makes sure that a MultiPolygon geometry (the outline of Dubrovnik) + is simplified to a polygon (convex hull). + """ + output_path = tmp_path.joinpath("geocode_test.json") + subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "dubrovnik", "--country_iso", "HR"], check=True) + assert os.path.exists(output_path) + assert os.path.getsize(output_path) != 0 \ No newline at end of file