From 0bec986a3e26bc4b1be4665061d6e26f4cf0a070 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Fri, 31 May 2024 00:44:09 +0800
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20allow=20ban=20resource=20mi?=
 =?UTF-8?q?rror=20via=20`--banned-mirrors-pattern`=20option=20(#272)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                     |  7 +++++++
 yutto/__main__.py             |  2 ++
 yutto/_typing.py              |  1 +
 yutto/processor/downloader.py | 35 +++++++++++++++++++++++++++++++++--
 yutto/utils/fetcher.py        |  4 +++-
 5 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 1834d475d..d6ebf05ef 100644
--- a/README.md
+++ b/README.md
@@ -468,6 +468,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file -
 
 设置两话之间的下载间隔（单位为秒），避免短时间內下载大量视频导致账号被封禁
 
+#### 禁用下载镜像
+
+-  参数 `--banned-mirrors-pattern`
+-  默认值 `None`
+
+使用正则禁用特定镜像，比如 `--banned-mirrors-pattern "mirrorali"` 将禁用 url 中包含 `mirrorali` 的镜像
+
 #### 不显示颜色
 
 -  参数 `--no-color`
diff --git a/yutto/__main__.py b/yutto/__main__.py
index 22192490b..c9810eb6a 100644
--- a/yutto/__main__.py
+++ b/yutto/__main__.py
@@ -133,6 +133,7 @@ def cli() -> argparse.ArgumentParser:
         "--metadata-format-premiered", default=TIME_DATE_FMT, help="专用于 metadata 文件中 premiered 字段的日期格式"
     )
     group_common.add_argument("--download-interval", default=0, type=int, help="设置下载间隔，单位为秒")
+    group_common.add_argument("--banned-mirrors-pattern", default=None, help="禁用下载链接的镜像源，使用正则匹配")
 
     # 资源选择
     group_common.add_argument(
@@ -349,6 +350,7 @@ async def run(args_list: list[argparse.Namespace]):
                             "premiered": args.metadata_format_premiered,
                             "dateadded": TIME_FULL_FMT,
                         },
+                        "banned_mirrors_pattern": args.banned_mirrors_pattern,
                     },
                 )
                 Logger.new_line()
diff --git a/yutto/_typing.py b/yutto/_typing.py
index 3a39b3b40..cc3074aeb 100644
--- a/yutto/_typing.py
+++ b/yutto/_typing.py
@@ -193,6 +193,7 @@ class DownloaderOptions(TypedDict):
     block_size: int
     num_workers: int
     metadata_format: dict[str, str]
+    banned_mirrors_pattern: str | None
 
 
 class FavouriteMetaData(TypedDict):
diff --git a/yutto/processor/downloader.py b/yutto/processor/downloader.py
index 8d5170ea6..c8b1188de 100644
--- a/yutto/processor/downloader.py
+++ b/yutto/processor/downloader.py
@@ -2,8 +2,10 @@
 
 import asyncio
 import os
+import re
 from enum import Enum
 from pathlib import Path
+from typing import Callable
 
 import httpx
 
@@ -86,6 +88,20 @@ def show_audios_info(audios: list[AudioUrlMeta], selected: int):
         Logger.info(log)
 
 
+def create_mirrors_filter(banned_mirrors_pattern: str | None) -> Callable[[list[str]], list[str]]:
+    mirror_filter: Callable[[str], bool]
+    if banned_mirrors_pattern is None:
+        mirror_filter = lambda _: True  # noqa: E731
+    else:
+        regex_banned_pattern = re.compile(banned_mirrors_pattern)
+        mirror_filter = lambda url: not regex_banned_pattern.search(url)  # noqa: E731
+
+    def mirrors_filter(mirrors: list[str]) -> list[str]:
+        return list(filter(mirror_filter, mirrors))
+
+    return mirrors_filter
+
+
 async def download_video_and_audio(
     client: httpx.AsyncClient,
     video: VideoUrlMeta | None,
@@ -99,13 +115,21 @@ async def download_video_and_audio(
     buffers: list[AsyncFileBuffer | None] = [None, None]
     sizes: list[int | None] = [None, None]
     coroutines_list: list[list[CoroutineWrapper[None]]] = []
+    mirrors_filter = create_mirrors_filter(options["banned_mirrors_pattern"])
     Fetcher.set_semaphore(options["num_workers"])
     if video is not None:
         vbuf = await AsyncFileBuffer(video_path, overwrite=options["overwrite"])
         vsize = await Fetcher.get_size(client, video["url"])
         video_coroutines = [
             CoroutineWrapper(
-                Fetcher.download_file_with_offset(client, video["url"], video["mirrors"], vbuf, offset, block_size)
+                Fetcher.download_file_with_offset(
+                    client,
+                    video["url"],
+                    mirrors_filter(video["mirrors"]),
+                    vbuf,
+                    offset,
+                    block_size,
+                )
             )
             for offset, block_size in slice_blocks(vbuf.written_size, vsize, options["block_size"])
         ]
@@ -117,7 +141,14 @@ async def download_video_and_audio(
         asize = await Fetcher.get_size(client, audio["url"])
         audio_coroutines = [
             CoroutineWrapper(
-                Fetcher.download_file_with_offset(client, audio["url"], audio["mirrors"], abuf, offset, block_size)
+                Fetcher.download_file_with_offset(
+                    client,
+                    audio["url"],
+                    mirrors_filter(audio["mirrors"]),
+                    abuf,
+                    offset,
+                    block_size,
+                )
             )
             for offset, block_size in slice_blocks(abuf.written_size, asize, options["block_size"])
         ]
diff --git a/yutto/utils/fetcher.py b/yutto/utils/fetcher.py
index 13fd54fab..5b5eb619d 100644
--- a/yutto/utils/fetcher.py
+++ b/yutto/utils/fetcher.py
@@ -200,7 +200,7 @@ async def download_file_with_offset(
         size: int | None,
     ) -> None:
         async with cls.semaphore:
-            Logger.debug(f"Start download (offset {offset}) {url}")
+            Logger.debug(f"Start download (offset {offset}, number of mirrors {len(mirrors)}) {url}")
             done = False
             headers = client.headers.copy()
             url_pool = [url] + mirrors
@@ -229,10 +229,12 @@ async def download_file_with_offset(
 
                 except httpx.TimeoutException:
                     Logger.warning(f"文件 {file_buffer.file_path} 下载超时，尝试重新连接...")
+                    Logger.debug(f"超时链接：{url}")
                 except httpx.HTTPError as e:
                     await asyncio.sleep(0.5)
                     error_type = e.__class__.__name__
                     Logger.warning(f"文件 {file_buffer.file_path} 下载出错（{error_type}），尝试重新连接...")
+                    Logger.debug(f"超时链接：{url}")
 
 
 def create_client(