Skip to content

Commit

Permalink
✨ feat: allow ban resource mirror via --banned-mirrors-pattern opti…
Browse files Browse the repository at this point in the history
…on (#272)
  • Loading branch information
SigureMo authored May 30, 2024
1 parent 8fcd954 commit 0bec986
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 3 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file -

设置两话之间的下载间隔(单位为秒),避免短时间內下载大量视频导致账号被封禁

#### 禁用下载镜像

- 参数 `--banned-mirrors-pattern`
- 默认值 `None`

使用正则禁用特定镜像,比如 `--banned-mirrors-pattern "mirrorali"` 将禁用 url 中包含 `mirrorali` 的镜像

#### 不显示颜色

- 参数 `--no-color`
Expand Down
2 changes: 2 additions & 0 deletions yutto/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def cli() -> argparse.ArgumentParser:
"--metadata-format-premiered", default=TIME_DATE_FMT, help="专用于 metadata 文件中 premiered 字段的日期格式"
)
group_common.add_argument("--download-interval", default=0, type=int, help="设置下载间隔,单位为秒")
group_common.add_argument("--banned-mirrors-pattern", default=None, help="禁用下载链接的镜像源,使用正则匹配")

# 资源选择
group_common.add_argument(
Expand Down Expand Up @@ -349,6 +350,7 @@ async def run(args_list: list[argparse.Namespace]):
"premiered": args.metadata_format_premiered,
"dateadded": TIME_FULL_FMT,
},
"banned_mirrors_pattern": args.banned_mirrors_pattern,
},
)
Logger.new_line()
Expand Down
1 change: 1 addition & 0 deletions yutto/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ class DownloaderOptions(TypedDict):
block_size: int
num_workers: int
metadata_format: dict[str, str]
banned_mirrors_pattern: str | None


class FavouriteMetaData(TypedDict):
Expand Down
35 changes: 33 additions & 2 deletions yutto/processor/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import asyncio
import os
import re
from enum import Enum
from pathlib import Path
from typing import Callable

import httpx

Expand Down Expand Up @@ -86,6 +88,20 @@ def show_audios_info(audios: list[AudioUrlMeta], selected: int):
Logger.info(log)


def create_mirrors_filter(banned_mirrors_pattern: str | None) -> Callable[[list[str]], list[str]]:
mirror_filter: Callable[[str], bool]
if banned_mirrors_pattern is None:
mirror_filter = lambda _: True # noqa: E731
else:
regex_banned_pattern = re.compile(banned_mirrors_pattern)
mirror_filter = lambda url: not regex_banned_pattern.search(url) # noqa: E731

def mirrors_filter(mirrors: list[str]) -> list[str]:
return list(filter(mirror_filter, mirrors))

return mirrors_filter


async def download_video_and_audio(
client: httpx.AsyncClient,
video: VideoUrlMeta | None,
Expand All @@ -99,13 +115,21 @@ async def download_video_and_audio(
buffers: list[AsyncFileBuffer | None] = [None, None]
sizes: list[int | None] = [None, None]
coroutines_list: list[list[CoroutineWrapper[None]]] = []
mirrors_filter = create_mirrors_filter(options["banned_mirrors_pattern"])
Fetcher.set_semaphore(options["num_workers"])
if video is not None:
vbuf = await AsyncFileBuffer(video_path, overwrite=options["overwrite"])
vsize = await Fetcher.get_size(client, video["url"])
video_coroutines = [
CoroutineWrapper(
Fetcher.download_file_with_offset(client, video["url"], video["mirrors"], vbuf, offset, block_size)
Fetcher.download_file_with_offset(
client,
video["url"],
mirrors_filter(video["mirrors"]),
vbuf,
offset,
block_size,
)
)
for offset, block_size in slice_blocks(vbuf.written_size, vsize, options["block_size"])
]
Expand All @@ -117,7 +141,14 @@ async def download_video_and_audio(
asize = await Fetcher.get_size(client, audio["url"])
audio_coroutines = [
CoroutineWrapper(
Fetcher.download_file_with_offset(client, audio["url"], audio["mirrors"], abuf, offset, block_size)
Fetcher.download_file_with_offset(
client,
audio["url"],
mirrors_filter(audio["mirrors"]),
abuf,
offset,
block_size,
)
)
for offset, block_size in slice_blocks(abuf.written_size, asize, options["block_size"])
]
Expand Down
4 changes: 3 additions & 1 deletion yutto/utils/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ async def download_file_with_offset(
size: int | None,
) -> None:
async with cls.semaphore:
Logger.debug(f"Start download (offset {offset}) {url}")
Logger.debug(f"Start download (offset {offset}, number of mirrors {len(mirrors)}) {url}")
done = False
headers = client.headers.copy()
url_pool = [url] + mirrors
Expand Down Expand Up @@ -229,10 +229,12 @@ async def download_file_with_offset(

except httpx.TimeoutException:
Logger.warning(f"文件 {file_buffer.file_path} 下载超时,尝试重新连接...")
Logger.debug(f"超时链接:{url}")
except httpx.HTTPError as e:
await asyncio.sleep(0.5)
error_type = e.__class__.__name__
Logger.warning(f"文件 {file_buffer.file_path} 下载出错({error_type}),尝试重新连接...")
Logger.debug(f"超时链接:{url}")


def create_client(
Expand Down

0 comments on commit 0bec986

Please sign in to comment.