From 0bec986a3e26bc4b1be4665061d6e26f4cf0a070 Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Fri, 31 May 2024 00:44:09 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20allow=20ban=20resource=20mi?= =?UTF-8?q?rror=20via=20`--banned-mirrors-pattern`=20option=20(#272)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 +++++++ yutto/__main__.py | 2 ++ yutto/_typing.py | 1 + yutto/processor/downloader.py | 35 +++++++++++++++++++++++++++++++++-- yutto/utils/fetcher.py | 4 +++- 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1834d475d..d6ebf05ef 100644 --- a/README.md +++ b/README.md @@ -468,6 +468,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file - 设置两话之间的下载间隔(单位为秒),避免短时间內下载大量视频导致账号被封禁 +#### 禁用下载镜像 + +- 参数 `--banned-mirrors-pattern` +- 默认值 `None` + +使用正则禁用特定镜像,比如 `--banned-mirrors-pattern "mirrorali"` 将禁用 url 中包含 `mirrorali` 的镜像 + #### 不显示颜色 - 参数 `--no-color` diff --git a/yutto/__main__.py b/yutto/__main__.py index 22192490b..c9810eb6a 100644 --- a/yutto/__main__.py +++ b/yutto/__main__.py @@ -133,6 +133,7 @@ def cli() -> argparse.ArgumentParser: "--metadata-format-premiered", default=TIME_DATE_FMT, help="专用于 metadata 文件中 premiered 字段的日期格式" ) group_common.add_argument("--download-interval", default=0, type=int, help="设置下载间隔,单位为秒") + group_common.add_argument("--banned-mirrors-pattern", default=None, help="禁用下载链接的镜像源,使用正则匹配") # 资源选择 group_common.add_argument( @@ -349,6 +350,7 @@ async def run(args_list: list[argparse.Namespace]): "premiered": args.metadata_format_premiered, "dateadded": TIME_FULL_FMT, }, + "banned_mirrors_pattern": args.banned_mirrors_pattern, }, ) Logger.new_line() diff --git a/yutto/_typing.py b/yutto/_typing.py index 3a39b3b40..cc3074aeb 100644 --- a/yutto/_typing.py +++ b/yutto/_typing.py @@ -193,6 +193,7 @@ class DownloaderOptions(TypedDict): block_size: int num_workers: int metadata_format: dict[str, str] + banned_mirrors_pattern: str | None class FavouriteMetaData(TypedDict): diff --git a/yutto/processor/downloader.py b/yutto/processor/downloader.py index 8d5170ea6..c8b1188de 100644 --- a/yutto/processor/downloader.py +++ b/yutto/processor/downloader.py @@ -2,8 +2,10 @@ import asyncio import os +import re from enum import Enum from pathlib import Path +from typing import Callable import httpx @@ -86,6 +88,20 @@ def show_audios_info(audios: list[AudioUrlMeta], selected: int): Logger.info(log) +def create_mirrors_filter(banned_mirrors_pattern: str | None) -> Callable[[list[str]], list[str]]: + mirror_filter: Callable[[str], bool] + if banned_mirrors_pattern is None: + mirror_filter = lambda _: True # noqa: E731 + else: + regex_banned_pattern = re.compile(banned_mirrors_pattern) + mirror_filter = lambda url: not regex_banned_pattern.search(url) # noqa: E731 + + def mirrors_filter(mirrors: list[str]) -> list[str]: + return list(filter(mirror_filter, mirrors)) + + return mirrors_filter + + async def download_video_and_audio( client: httpx.AsyncClient, video: VideoUrlMeta | None, @@ -99,13 +115,21 @@ async def download_video_and_audio( buffers: list[AsyncFileBuffer | None] = [None, None] sizes: list[int | None] = [None, None] coroutines_list: list[list[CoroutineWrapper[None]]] = [] + mirrors_filter = create_mirrors_filter(options["banned_mirrors_pattern"]) Fetcher.set_semaphore(options["num_workers"]) if video is not None: vbuf = await AsyncFileBuffer(video_path, overwrite=options["overwrite"]) vsize = await Fetcher.get_size(client, video["url"]) video_coroutines = [ CoroutineWrapper( - Fetcher.download_file_with_offset(client, video["url"], video["mirrors"], vbuf, offset, block_size) + Fetcher.download_file_with_offset( + client, + video["url"], + mirrors_filter(video["mirrors"]), + vbuf, + offset, + block_size, + ) ) for offset, block_size in slice_blocks(vbuf.written_size, vsize, options["block_size"]) ] @@ -117,7 +141,14 @@ async def download_video_and_audio( asize = await Fetcher.get_size(client, audio["url"]) audio_coroutines = [ CoroutineWrapper( - Fetcher.download_file_with_offset(client, audio["url"], audio["mirrors"], abuf, offset, block_size) + Fetcher.download_file_with_offset( + client, + audio["url"], + mirrors_filter(audio["mirrors"]), + abuf, + offset, + block_size, + ) ) for offset, block_size in slice_blocks(abuf.written_size, asize, options["block_size"]) ] diff --git a/yutto/utils/fetcher.py b/yutto/utils/fetcher.py index 13fd54fab..5b5eb619d 100644 --- a/yutto/utils/fetcher.py +++ b/yutto/utils/fetcher.py @@ -200,7 +200,7 @@ async def download_file_with_offset( size: int | None, ) -> None: async with cls.semaphore: - Logger.debug(f"Start download (offset {offset}) {url}") + Logger.debug(f"Start download (offset {offset}, number of mirrors {len(mirrors)}) {url}") done = False headers = client.headers.copy() url_pool = [url] + mirrors @@ -229,10 +229,12 @@ async def download_file_with_offset( except httpx.TimeoutException: Logger.warning(f"文件 {file_buffer.file_path} 下载超时,尝试重新连接...") + Logger.debug(f"超时链接:{url}") except httpx.HTTPError as e: await asyncio.sleep(0.5) error_type = e.__class__.__name__ Logger.warning(f"文件 {file_buffer.file_path} 下载出错({error_type}),尝试重新连接...") + Logger.debug(f"超时链接:{url}") def create_client(