diff --git a/README.md b/README.md index bb0cd1b..f296228 100644 --- a/README.md +++ b/README.md @@ -30,16 +30,18 @@ Install with pip: pip install open-buildings ``` -Now things may sorta work? I spent close to an hour battling this and it seems to install in the local repo directory, but not -in the venv / path for some reason. So theoretically you should be able to run `gob-tools benchmark 36b_buildings.csv test-output --format parquet` -from anywhere and have it work. Right now it's only working for me in the repo directory. I think the safest thing is to just do +This should add a CLI that you can then use. If it's working then: ```bash -python google-buildings-cli.py benchmark 36b_buildings.csv test-output --format parquet +open_buildings ``` -with the python file. Any help is more than welcome. Maybe next I'll try poetry? This python package management shit is really as bad as everyone says, even -ChatGPT wasn't able to get me there. +Should print out a help message. You then should be able run the CLI: + + +```bash +open_buildings benchmark 36b_buildings.csv test-output-dir --format parquet +``` The only CSV files that will work are those from Google's Open Buildings dataset. @@ -67,30 +69,41 @@ A sample output for `benchmark`, run on 36b_buildings.csv, a 130 mb CSV file is: The full options can be found with `--help` after each command, and I'll put them here for reference: ``` -Usage: gob-tools convert [OPTIONS] INPUT_PATH OUTPUT_DIRECTORY +Usage: open_buildings convert [OPTIONS] INPUT_PATH OUTPUT_DIRECTORY + + Converts a CSV or a directory of CSV's to an alternate format. Input CSV's + are assumed to be from Google's Open Buildings Options: --format [fgb|parquet|gpkg|shp] - The output format. - --overwrite Whether to overwrite existing output files. - --process [duckdb|pandas|ogr] The processing method to use. + The output format. The default is FlatGeobuf (fgb) + --overwrite Whether to overwrite any existing output files. + --process [duckdb|pandas|ogr] The processing method to use. The default is + pandas. --skip-split-multis Whether to keep multipolygons as they are - without splitting into their component - polygons. + without splitting into their component polygons. --verbose Whether to print detailed processing information. --help Show this message and exit. ``` ``` -Usage: gob-tools benchmark [OPTIONS] INPUT_PATH OUTPUT_DIRECTORY +Usage: open_buildings benchmark [OPTIONS] INPUT_PATH OUTPUT_DIRECTORY + + Runs the convert function on each of the supplied processes and formats, + printing the timing of each as a table Options: - --processes TEXT The processing methods to use. - --formats TEXT The output formats. + --processes TEXT The processing methods to use. One or more of duckdb, + pandas or ogr, in a comma-separated list. Default is + duckdb,pandas,ogr. + --formats TEXT The output formats to benchmark. One or more of fgb, + parquet, shp or gpkg, in a comma-separated list. + Default is fgb,parquet,shp,gpkg. --skip-split-multis Whether to keep multipolygons as they are without splitting into their component polygons. - --no-gpq Disable GPQ conversion. + --no-gpq Disable GPQ conversion. Timing will be faster, but not + valid GeoParquet (until DuckDB adds support) --verbose Whether to print detailed processing information. --output-format TEXT The format of the output. Options: ascii, csv, json. --help Show this message and exit. diff --git a/open_buildings/__init__.py b/open_buildings/__init__.py index 40cb2c3..0cb0b7c 100644 --- a/open_buildings/__init__.py +++ b/open_buildings/__init__.py @@ -2,6 +2,6 @@ __author__ = """Chris Holmes""" __email__ = 'chomie@gmail.com' -__version__ = '0.0.2' +__version__ = '0.0.4' from .open_buildings import * diff --git a/open_buildings/cli.py b/open_buildings/cli.py index e7eed4b..6bc4f9c 100644 --- a/open_buildings/cli.py +++ b/open_buildings/cli.py @@ -1,11 +1,11 @@ -"""Console script for open_buildings.""" +"""CLI to convert Google Open Building CSV files to alternate formats.""" import sys import click @click.group() def main(): - """Console script for open_buildings.""" + """CLI to convert Google Open Building CSV files to alternate formats.""" pass def handle_comma_separated(ctx, param, value): @@ -18,20 +18,20 @@ def handle_comma_separated(ctx, param, value): '--processes', callback=handle_comma_separated, default='duckdb,pandas,ogr', - help="The processing methods to use.", + help="The processing methods to use. One or more of duckdb, pandas or ogr, in a comma-separated list. Default is duckdb,pandas,ogr.", ) @click.option( '--formats', callback=handle_comma_separated, default='fgb,parquet,shp,gpkg', - help="The output formats.", + help="The output formats to benchmark. One or more of fgb, parquet, shp or gpkg, in a comma-separated list. Default is fgb,parquet,shp,gpkg.", ) @click.option( '--skip-split-multis', is_flag=True, help="Whether to keep multipolygons as they are without splitting into their component polygons.", ) -@click.option('--no-gpq', is_flag=True, help="Disable GPQ conversion.") +@click.option('--no-gpq', is_flag=True, help="Disable GPQ conversion. Timing will be faster, but not valid GeoParquet (until DuckDB adds support)") @click.option( '--verbose', is_flag=True, help="Whether to print detailed processing information." ) @@ -50,6 +50,7 @@ def benchmark( verbose, output_format, ): + """Runs the convert function on each of the supplied processes and formats, printing the timing of each as a table""" results = process_benchmark( input_path, output_directory, processes, formats, not skip_split_multis, verbose ) @@ -75,16 +76,16 @@ def benchmark( '--format', type=click.Choice(['fgb', 'parquet', 'gpkg', 'shp']), default='fgb', - help="The output format.", + help="The output format. The default is FlatGeobuf (fgb)", ) @click.option( - '--overwrite', is_flag=True, help="Whether to overwrite existing output files." + '--overwrite', is_flag=True, help="Whether to overwrite any existing output files." ) @click.option( '--process', type=click.Choice(['duckdb', 'pandas', 'ogr']), default='pandas', - help="The processing method to use.", + help="The processing method to use. The default is pandas.", ) @click.option( '--skip-split-multis', @@ -97,6 +98,7 @@ def benchmark( def convert( input_path, output_directory, format, overwrite, process, skip_split_multis, verbose ): + """Converts a CSV or a directory of CSV's to an alternate format. Input CSV's are assumed to be from Google's Open Buildings""" process_geometries( input_path, output_directory, @@ -107,13 +109,5 @@ def convert( verbose, ) -@main.command() -@click.argument('building_id') -def info(building_id): - """Get information about a specific building.""" - click.echo(f"Getting information for building with ID: {building_id}") - click.echo("More info...") - # Add your logic to fetch building information here - if __name__ == "__main__": sys.exit(main()) # pragma: no cover diff --git a/setup.cfg b/setup.cfg index 4dd27e5..13e78ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.2 +current_version = 0.0.4 commit = True tag = True diff --git a/setup.py b/setup.py index 84634f4..34089fb 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/opengeos/open-buildings', - version='0.0.2', + version='0.0.4', zip_safe=False, )