Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance CLI interface #15

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 84 additions & 23 deletions unzip-http
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
#!/usr/bin/env python3

'''
Usage:
unzip-http <url.zip> <filenames..>
"""
usage: unzip-http [-h] [-l] [-f] [-o] url [files ...]

Extract <filenames> from a remote .zip at <url> to stdout.
If no filenames given, displays .zip contents (filenames and sizes).
Each filename can be a wildcard glob; all matching files are concatenated and sent to stdout in zipfile order.
Extract individual files from .zip files over http without downloading the
entire archive. HTTP server must send `Accept-Ranges: bytes` and
`Content-Length` in headers.

HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.
'''
positional arguments:
url URL of the remote zip file
files Files to extract. If no filenames given, displays .zip
contents (filenames and sizes). Each filename can be a
wildcard glob.

options:
-h, --help show this help message and exit
-l, --list List files in the remote zip file
-f, --full-filepaths Recreate folder structure from zip file when extracting
(instead of extracting the files to the current
directory)
-o, --stdout Write files to stdout (if multiple files: concatenate
them to stdout, in zipfile order)
"""

import sys
import io
import math
import time
import fnmatch
import argparse
import pathlib

import unzip_http

Expand Down Expand Up @@ -44,19 +59,65 @@ class StreamProgress:
return r


def main(url, *globs):
rzf = unzip_http.RemoteZipFile(url)
def list_files(rzf):
def safelog(x):
return 1 if x == 0 else math.ceil(math.log10(x))

digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
digits_plain = max(safelog(f.file_size ) for f in rzf.infolist())
fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
for f in rzf.infolist():
if not globs:
print(f'{f.compress_size} -> {f.file_size} {f.filename}')
elif any(fnmatch.fnmatch(f.filename, g) for g in globs):
fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
sys.stdout.buffer.write(r)


args = sys.argv[1:]
if not args:
print(__doc__, file=sys.stderr)
else:
main(*args)
print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)


def extract_one(outfile, rzf, f, ofname):
print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)

fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
outfile.write(r)


def download_file(f, rzf, args):
if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
return

if args.stdout:
extract_one(sys.stdout.buffer, rzf, f, "stdout")
else:
path = pathlib.Path(f.filename)
if args.full_filepaths:
path.parent.mkdir(parents=True, exist_ok=True)
else:
path = path.name

with open(str(path), 'wb') as of:
extract_one(of, rzf, f, str(path))


def main(args):
rzf = unzip_http.RemoteZipFile(args.url[0])
if args.list or len(args.files) == 0:
list_files(rzf)
else:
for f in rzf.infolist():
download_file(f, rzf, args)


if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='unzip-http', \
description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")

parser.add_argument('-l', '--list', action='store_true', default=False,
help="List files in the remote zip file")
parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
parser.add_argument('-o', '--stdout', action='store_true', default=False,
help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")

parser.add_argument("url", nargs=1, help="URL of the remote zip file")
parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")

args = parser.parse_args()
main(args)

Loading