diff --git a/.gitignore b/.gitignore index 199c7c6..f4f44bf 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,4 @@ data # PyTest .pytest_cache/ virtualenv +data/ diff --git a/planetutils/elevation_tile_download.py b/planetutils/elevation_tile_download.py index 0959087..b7629d4 100644 --- a/planetutils/elevation_tile_download.py +++ b/planetutils/elevation_tile_download.py @@ -7,9 +7,11 @@ from .bbox import load_features_csv, load_feature_string from .elevation_tile_downloader import ElevationGeotiffDownloader, ElevationSkadiDownloader + def main(): parser = argparse.ArgumentParser() parser.add_argument('--outpath', help='Output path for elevation tiles.', default='.') + parser.add_argument('--existpath', help='Directory to check for existing files.', default=None) parser.add_argument('--csv', help='Path to CSV file with bounding box definitions.') parser.add_argument('--bbox', help='Bounding box for extract file. Format for coordinates: left,bottom,right,top') parser.add_argument('--verbose', help="Verbose output", action='store_true') @@ -22,11 +24,11 @@ def main(): log.set_verbose() if args.format == 'geotiff': - p = ElevationGeotiffDownloader(args.outpath, zoom=args.zoom) + p = ElevationGeotiffDownloader(args.outpath, zoom=args.zoom, exist_path=args.existpath) elif args.format == 'skadi': p = ElevationSkadiDownloader(args.outpath) else: - print("Unknown format: %s"%args.format) + print("Unknown format: %s" % args.format) sys.exit(1) if args.csv: diff --git a/planetutils/elevation_tile_downloader.py b/planetutils/elevation_tile_downloader.py index c03613e..c5d6ecb 100644 --- a/planetutils/elevation_tile_downloader.py +++ b/planetutils/elevation_tile_downloader.py @@ -1,12 +1,19 @@ #!/usr/bin/env python from __future__ import absolute_import, unicode_literals + import os -import subprocess import math -from . import download -from . import log -from .bbox import validate_bbox +import urllib3 +from retry import retry +from concurrent import futures + +from urllib3 import Timeout + +from planetutils import download +from planetutils import log +from planetutils.bbox import validate_bbox + def makedirs(path): try: @@ -14,9 +21,15 @@ def makedirs(path): except OSError as e: pass + class ElevationDownloader(object): - def __init__(self, outpath='.'): + zoom = 0 + timeout = Timeout(connect=3.0, read=7.0) + http = urllib3.PoolManager(maxsize=50, timeout=timeout) + + def __init__(self, outpath='.', exist_path=None): self.outpath = outpath + self.exist_path = exist_path def download_planet(self): self.download_bbox([-180, -90, 180, 90]) @@ -24,36 +37,52 @@ def download_planet(self): def download_bboxes(self, bboxes): for name, bbox in bboxes.items(): self.download_bbox(bbox) - - def download_bbox(self, bbox, bucket='elevation-tiles-prod', prefix='geotiff'): + + def filter_needed(self, bbox): tiles = self.get_bbox_tiles(bbox) found = set() download = set() - for z,x,y in tiles: - od = self.tile_path(z, x, y) - op = os.path.join(self.outpath, *od) - if self.tile_exists(op): - found.add((x,y)) - else: - download.add((x,y)) - log.info("found %s tiles; %s to download"%(len(found), len(download))) - for x,y in sorted(download): - self.download_tile(bucket, prefix, z, x, y) + exist_dir = self.exist_path if self.exist_path else self.outpath + for root, dirs, files in os.walk(exist_dir): + path = root.split(os.sep) + for file in files: + if '.tif' in file: + found.add("%s/%s/%s" % (path[-2], path[-1], file.split('.')[0])) + for z, x, y in tiles: + if '%s/%s/%s' % (z, x, y) not in found: + download.add((x, y)) + log.info("found %s tiles; %s to download" % (len(found), len(download))) + return download + + def download_bbox(self, bbox, bucket='elevation-tiles-prod', prefix='geotiff'): + download = self.filter_needed(bbox) + tasks = {self._tile_url_path(bucket, prefix, self.zoom, x, y) for x, y in download} + + with futures.ThreadPoolExecutor() as executor: + # Start the load operations and mark each future with its URL + future_to_url = { + executor.submit(self._download_multi, url_op): url_op for url_op in tasks + } + for future in futures.as_completed(future_to_url): + try: + future.result() + except Exception as exc: + log.error('generated an exception: %s' % exc) + pass def tile_exists(self, op): if os.path.exists(op): return True - def download_tile(self, bucket, prefix, z, x, y, suffix=''): + def _tile_url_path(self, bucket, prefix, z, x, y, suffix=''): od = self.tile_path(z, x, y) op = os.path.join(self.outpath, *od) makedirs(os.path.join(self.outpath, *od[:-1])) if prefix: od = [prefix]+od - url = 'http://s3.amazonaws.com/%s/%s%s'%(bucket, '/'.join(od), suffix) - log.info("downloading %s to %s"%(url, op)) - self._download(url, op) - + url = 'http://s3.amazonaws.com/%s/%s%s' % (bucket, '/'.join(od), suffix) + return url, op + def tile_path(self, z, x, y): raise NotImplementedError @@ -63,6 +92,18 @@ def get_bbox_tiles(self, bbox): def _download(self, url, op): download.download(url, op) + @retry(exceptions=Exception, tries=5, delay=2, backoff=2, logger=log) + def _download_multi(self, url_op): + url, op = url_op + log.info("downloading %s to %s" % (url, op)) + request = self.http.request('GET', url) + with open(op, 'wb') as f: + try: + f.write(request.data) + except Exception as exc: + raise Exception("Error downloading %r - %s", (url, exc)) + + class ElevationGeotiffDownloader(ElevationDownloader): def __init__(self, *args, **kwargs): self.zoom = kwargs.pop('zoom', 0) @@ -80,7 +121,7 @@ def get_bbox_tiles(self, bbox): size = 2**self.zoom xt = lambda x:int((x + 180.0) / 360.0 * size) yt = lambda y:int((1.0 - math.log(math.tan(math.radians(y)) + (1 / math.cos(math.radians(y)))) / math.pi) / 2.0 * size) - tiles = [] + tiles = [] for x in range(xt(left), xt(right)+1): for y in range(yt(top), yt(bottom)+1): tiles.append([self.zoom, x, y]) @@ -89,9 +130,10 @@ def get_bbox_tiles(self, bbox): def tile_path(self, z, x, y): return list(map(str, [z, x, str(y)+'.tif'])) + class ElevationSkadiDownloader(ElevationDownloader): HGT_SIZE = (3601 * 3601 * 2) - + def get_bbox_tiles(self, bbox): left, bottom, right, top = validate_bbox(bbox) min_x = int(math.floor(left)) @@ -104,13 +146,13 @@ def get_bbox_tiles(self, bbox): for y in range(min_y, max_y): tiles.add((0, x, y)) return tiles - + def tile_exists(self, op): - if os.path.exists(op) and os.stat(op).st_size == self.HGT_SIZE: + if os.path.exists(op) and os.stat(op).st_size == self.HGT_SIZE: return True def download_tile(self, bucket, prefix, z, x, y, suffix=''): - super(ElevationSkadiDownloader, self).download_tile(bucket, 'skadi', z, x, y, suffix='.gz') + super(ElevationSkadiDownloader, self)._tile_url_path(bucket, 'skadi', z, x, y, suffix='.gz') def tile_path(self, z, x, y): ns = lambda i:'S%02d'%abs(i) if i < 0 else 'N%02d'%abs(i) diff --git a/planetutils/log.py b/planetutils/log.py index 89e7bde..337471f 100644 --- a/planetutils/log.py +++ b/planetutils/log.py @@ -2,18 +2,22 @@ logging.basicConfig(format='[%(levelname)s] %(message)s') logger = logging.getLogger(__name__) + def set_quiet(): logger.setLevel(logging.ERROR) + def set_verbose(): logger.setLevel(logging.DEBUG) + def set_default(): logger.setLevel(logging.INFO) + set_default() info = logger.info debug = logger.debug warning = logger.warning -error = logger.error \ No newline at end of file +error = logger.error diff --git a/setup.py b/setup.py index 1aa90dd..e70a7d3 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ author_email='ian@interline.io', license='MIT', packages=find_packages(exclude=['contrib', 'docs', 'tests']), - install_requires=['future', 'requests'], #, 'osmium', 'boto3' + install_requires=['future', 'requests', 'retry'], #, 'osmium', 'boto3', 'gdal' tests_require=['nose'], test_suite = 'nose.collector', entry_points={ @@ -28,6 +28,7 @@ 'osm_extract_download=planetutils.osm_extract_download:main', 'elevation_tile_download=planetutils.elevation_tile_download:main', 'elevation_tile_merge=planetutils.elevation_tile_merge:main', + 'elevation_tile_terrain=planetutils.elevation_tile_terrain:main', 'valhalla_tilepack_download=planetutils.tilepack_download:main', 'valhalla_tilepack_list=planetutils.tilepack_list:main' ],