From fdf13b0b136680e64cff4797e6307e90d320224f Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Wed, 25 Dec 2024 15:15:12 +0000 Subject: [PATCH] Move audio util Signed-off-by: DarkLight1337 --- vllm/multimodal/audio.py | 12 ++++++++++++ vllm/multimodal/utils.py | 13 ++----------- vllm/multimodal/video.py | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py index c92deddbcb255..314d21b746236 100644 --- a/vllm/multimodal/audio.py +++ b/vllm/multimodal/audio.py @@ -1,3 +1,5 @@ +from typing import Any + import numpy as np import numpy.typing as npt @@ -26,6 +28,16 @@ def _default_max_multimodal_tokens(self, ctx: InputContext) -> int: "There is no default maximum multimodal tokens") +def try_import_audio_packages() -> tuple[Any, Any]: + try: + import librosa + import soundfile + except ImportError as exc: + raise ImportError( + "Please install vllm[audio] for audio support.") from exc + return librosa, soundfile + + def resample_audio( audio: npt.NDArray[np.floating], *, diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index da1110dce5b44..12d3823eb9765 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -2,7 +2,7 @@ import os from functools import lru_cache from io import BytesIO -from typing import Any, List, Optional, Tuple, TypeVar, Union +from typing import List, Optional, Tuple, TypeVar, Union import numpy as np import numpy.typing as npt @@ -14,6 +14,7 @@ from vllm.logger import init_logger from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer +from .audio import try_import_audio_packages from .inputs import MultiModalDataDict, PlaceholderRange from .video import try_import_video_packages @@ -205,16 +206,6 @@ async def async_fetch_video(video_url: str, return video -def try_import_audio_packages() -> Tuple[Any, Any]: - try: - import librosa - import soundfile - except ImportError as exc: - raise ImportError( - "Please install vllm[audio] for audio support.") from exc - return librosa, soundfile - - def fetch_audio(audio_url: str) -> Tuple[np.ndarray, Union[int, float]]: """ Load audio from a URL. diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index 7f967d0afa52f..bfcdef70718bc 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -78,7 +78,7 @@ def _default_max_multimodal_tokens(self, ctx: InputContext) -> int: return 4096 -def try_import_video_packages() -> Any: +def try_import_video_packages() -> tuple[Any, Any]: try: import cv2 import decord