Skip to content

Commit

Permalink
Better UX for downloading many files
Browse files Browse the repository at this point in the history
  • Loading branch information
jennydaman committed Mar 13, 2022
1 parent 211ae11 commit fd5f983
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 10 deletions.
30 changes: 27 additions & 3 deletions caw/movedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from chris.client import ChrisClient
from chris.cube.files import DownloadableFile
from chris.types import CUBEUrl
from chris.cube.pagination import TooMuchPaginationException, MAX_REQUESTS, REQUESTS_ENV_VAR_NAME
import logging
from pathlib import Path

Expand Down Expand Up @@ -83,15 +84,15 @@ def __calculate_target(remote_file: DownloadableFile) -> Tuple[Path, Downloadabl
os.makedirs(target.parent, exist_ok=True)
return target, remote_file

search = tuple(client.get_files(url))
with typer.progressbar(search, length=len(search), label='Getting information', file=sys.stderr) as progress:
files_to_download = _discover_files_to_download(client, url)

with typer.progressbar(files_to_download, length=len(files_to_download), label='Getting information', file=sys.stderr) as progress:
to_download = frozenset(__calculate_target(remote_file) for remote_file in progress)

with typer.progressbar(length=len(to_download), label='Downloading files', file=sys.stderr) as progress:
def download_file(t: Tuple[Path, DownloadableFile]) -> int:
"""
Download file and move the progress bar
:param t: tuple
:return: downloaded file size
"""
target, remote_file = t
Expand All @@ -110,3 +111,26 @@ def download_file(t: Tuple[Path, DownloadableFile]) -> int:
else:
size = f'{total_size / 1e9:.4f} GB'
typer.secho(size, fg=typer.colors.GREEN, err=True)


def _discover_files_to_download(client: ChrisClient, url: CUBEUrl) -> Tuple[DownloadableFile, ...]:
typer.echo('Discovering files... ', nl=False)
total = 0

def report_discovered_file(f: DownloadableFile) -> DownloadableFile:
nonlocal total
total += 1
typer.echo(f'\rDiscovering files... {total}', nl=False)
return f

try:
search = tuple(report_discovered_file(f) for f in client.get_files(url))
except TooMuchPaginationException:
typer.echo(
f'Number of paginated requests exceeded {MAX_REQUESTS}.'
f"If you're trying to download many files, you can increase the limit:"
f'\n\n\t {" ".join([ "env", f"{REQUESTS_ENV_VAR_NAME}={MAX_REQUESTS + 99900}"] + sys.argv)}\n'
)
raise typer.Abort()
typer.echo(f'\rFound {total} files to download.')
return search
8 changes: 4 additions & 4 deletions chris/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from chris.cube.plugin import Plugin
from chris.cube.plugin_instance import PluginInstance
from chris.cube.files import ListOfDownloadableFiles
from chris.cube.files import DownloadableFilesGenerator
from chris.cube.registered_pipeline import RegisteredPipeline
from chris.cube.pagination import fetch_paginated_objects
from chris.cube.resource import ConnectedResource
Expand Down Expand Up @@ -178,13 +178,13 @@ def run(self, plugin_name='', plugin_url='', plugin: Optional[PluginInstance] =
plugin = self.get_plugin(name_exact=plugin_name, url=plugin_url)
return plugin.create_instance(params)

def search_uploadedfiles(self, fname='', fname_exact='') -> ListOfDownloadableFiles:
def search_uploadedfiles(self, fname='', fname_exact='') -> DownloadableFilesGenerator:
qs = self._join_qs(fname=fname, fname_exact=fname_exact)
url = CUBEUrl(f"{self.collection_links['uploadedfiles']}search/?{qs}")
return self.get_files(url)

def get_files(self, url: CUBEUrl) -> ListOfDownloadableFiles:
return ListOfDownloadableFiles(url=url, s=self.s)
def get_files(self, url: CUBEUrl) -> DownloadableFilesGenerator:
return DownloadableFilesGenerator(url=url, s=self.s)

def search_pipelines(self, name='') -> Generator[RegisteredPipeline, None, None]:
return fetch_paginated_objects(
Expand Down
2 changes: 1 addition & 1 deletion chris/cube/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def download(self, destination: Path, chunk_size=8192):


@dataclass(frozen=True)
class ListOfDownloadableFiles(Iterable[DownloadableFile], CUBEResource):
class DownloadableFilesGenerator(Iterable[DownloadableFile], CUBEResource):
def __iter__(self) -> Generator[DownloadableFile, None, None]:
return fetch_paginated_objects(s=self.s, url=self.url, constructor=self._construct_downloadable_file)

Expand Down
6 changes: 5 additions & 1 deletion chris/cube/pagination.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import sys
import json
from typing import Generator, Any, TypedDict, Callable, TypeVar, List, Dict

Expand All @@ -9,6 +11,8 @@
import logging

logger = logging.getLogger(__name__)
REQUESTS_ENV_VAR_NAME = 'CAW_PAGINATION_MAX_REQUESTS'
MAX_REQUESTS = int(os.getenv(REQUESTS_ENV_VAR_NAME, 100))


class UnrecognizedResponseException(Exception):
Expand All @@ -32,7 +36,7 @@ class JSONPaginatedResponse(TypedDict):
def fetch_paginated_objects(s: requests.Session,
url: CUBEUrl,
constructor=Callable[..., T],
max_requests=100
max_requests=MAX_REQUESTS
) -> Generator[T, None, None]:
for d in fetch_paginated_raw(s, url, max_requests):
yield constructor(s=s, **d)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='caw',
version='0.6.0',
version='0.6.1a1',
packages=find_packages(exclude=('*.tests',)),
url='https://github.com/FNNDSC/caw',
license='MIT',
Expand Down

0 comments on commit fd5f983

Please sign in to comment.