Skip to content

Commit

Permalink
Move audio util
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Dec 25, 2024
1 parent b384a4c commit fdf13b0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
12 changes: 12 additions & 0 deletions vllm/multimodal/audio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

import numpy as np
import numpy.typing as npt

Expand Down Expand Up @@ -26,6 +28,16 @@ def _default_max_multimodal_tokens(self, ctx: InputContext) -> int:
"There is no default maximum multimodal tokens")


def try_import_audio_packages() -> tuple[Any, Any]:
try:
import librosa
import soundfile
except ImportError as exc:
raise ImportError(
"Please install vllm[audio] for audio support.") from exc
return librosa, soundfile


def resample_audio(
audio: npt.NDArray[np.floating],
*,
Expand Down
13 changes: 2 additions & 11 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from functools import lru_cache
from io import BytesIO
from typing import Any, List, Optional, Tuple, TypeVar, Union
from typing import List, Optional, Tuple, TypeVar, Union

import numpy as np
import numpy.typing as npt
Expand All @@ -14,6 +14,7 @@
from vllm.logger import init_logger
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

from .audio import try_import_audio_packages
from .inputs import MultiModalDataDict, PlaceholderRange
from .video import try_import_video_packages

Expand Down Expand Up @@ -205,16 +206,6 @@ async def async_fetch_video(video_url: str,
return video


def try_import_audio_packages() -> Tuple[Any, Any]:
try:
import librosa
import soundfile
except ImportError as exc:
raise ImportError(
"Please install vllm[audio] for audio support.") from exc
return librosa, soundfile


def fetch_audio(audio_url: str) -> Tuple[np.ndarray, Union[int, float]]:
"""
Load audio from a URL.
Expand Down
2 changes: 1 addition & 1 deletion vllm/multimodal/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _default_max_multimodal_tokens(self, ctx: InputContext) -> int:
return 4096


def try_import_video_packages() -> Any:
def try_import_video_packages() -> tuple[Any, Any]:
try:
import cv2
import decord
Expand Down

0 comments on commit fdf13b0

Please sign in to comment.