From 7afef0aa4d72e76a7e2c3959c58e3b6dcae08645 Mon Sep 17 00:00:00 2001 From: iBug Date: Mon, 4 Dec 2023 00:48:04 +0800 Subject: [PATCH] github-release: Backport upstream version of do_download() --- github-release/tunasync/github-release.py | 48 +++++++++++++---------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/github-release/tunasync/github-release.py b/github-release/tunasync/github-release.py index 1953310..c9ad1a2 100644 --- a/github-release/tunasync/github-release.py +++ b/github-release/tunasync/github-release.py @@ -2,6 +2,7 @@ import os import sys +import tempfile import threading import traceback import queue @@ -74,25 +75,29 @@ def github_get(*args, **kwargs): return requests.get(*args, **kwargs) -def do_download(remote_url: str, dst_file: Path, remote_ts: float): - tmpfile = dst_file.parent / (dst_file.name + '.downloading') - try: - # NOTE the stream=True parameter below - with github_get(remote_url, stream=True) as r: - r.raise_for_status() - with open(tmpfile, 'wb') as f: +def do_download(remote_url: str, dst_file: Path, remote_ts: float, remote_size: int): + # NOTE the stream=True parameter below + with github_get(remote_url, stream=True) as r: + r.raise_for_status() + tmp_dst_file = None + try: + with tempfile.NamedTemporaryFile(prefix="." + dst_file.name + ".", suffix=".tmp", dir=dst_file.parent, delete=False) as f: + tmp_dst_file = Path(f.name) for chunk in r.iter_content(chunk_size=1024**2): if chunk: # filter out keep-alive new chunks f.write(chunk) # f.flush() - os.utime(tmpfile, (remote_ts, remote_ts)) - except Exception as e: - raise - else: - tmpfile.rename(dst_file) - finally: - if tmpfile.is_file(): - tmpfile.unlink() + # check for downloaded size + downloaded_size = tmp_dst_file.stat().st_size + if remote_size is not None and downloaded_size != remote_size: + raise ValueError(f'File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes') + os.utime(tmp_dst_file, (remote_ts, remote_ts)) + tmp_dst_file.chmod(0o644) + tmp_dst_file.replace(dst_file) + finally: + if tmp_dst_file is not None: + if tmp_dst_file.is_file(): + tmp_dst_file.unlink() def downloading_worker(q): @@ -101,7 +106,7 @@ def downloading_worker(q): if item is None: break - url, dst_file, working_dir, updated = item + url, dst_file, working_dir, updated, remote_size = item print("downloading", url, "to", dst_file.relative_to(working_dir), flush=True) @@ -159,12 +164,14 @@ def download(release, release_dir, tarball=False): release['published_at'], '%Y-%m-%dT%H:%M:%SZ').timestamp() dst_file = release_dir / 'repo-snapshot.tar.gz' remote_filelist.append(dst_file.relative_to(working_dir)) + # no size information, use None to skip size check + remote_size = None if dst_file.is_file(): print("skipping", dst_file.relative_to(working_dir), flush=True) else: dst_file.parent.mkdir(parents=True, exist_ok=True) - task_queue.put((url, dst_file, working_dir, updated)) + task_queue.put((url, dst_file, working_dir, updated, remote_size)) for asset in release['assets']: url = asset['browser_download_url'] @@ -172,7 +179,8 @@ def download(release, release_dir, tarball=False): asset['updated_at'], '%Y-%m-%dT%H:%M:%SZ').timestamp() dst_file = release_dir / ensure_safe_name(asset['name']) remote_filelist.append(dst_file.relative_to(working_dir)) - total_size += asset['size'] + remote_size = asset['size'] + total_size += remote_size if dst_file.is_file(): if args.fast_skip: @@ -186,14 +194,14 @@ def download(release, release_dir, tarball=False): # print(f"{local_filesize} vs {asset['size']}") # print(f"{local_mtime} vs {updated}") if local_mtime > updated or \ - asset['size'] == local_filesize and local_mtime == updated: + remote_size == local_filesize and local_mtime == updated: print("skipping", dst_file.relative_to( working_dir), flush=True) continue else: dst_file.parent.mkdir(parents=True, exist_ok=True) - task_queue.put((url, dst_file, working_dir, updated)) + task_queue.put((url, dst_file, working_dir, updated, remote_size)) def link_latest(name, repo_dir): try: