Skip to content

Commit

Permalink
Disable chunking availability if low-memory #43
Browse files Browse the repository at this point in the history
  • Loading branch information
kristiankielhofner committed Apr 10, 2023
1 parent 4e32596 commit e484775
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ async def create_datagram_endpoint(self, protocol_factory,
# model threads
model_threads = settings.model_threads

# Default to supporting chunking
has_chunking = True

# Try CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"

Expand All @@ -151,6 +154,12 @@ async def create_datagram_endpoint(self, protocol_factory,
logger.info(f'CUDA: Device {cuda_dev_num} total memory: {cuda_total_memory} bytes')
logger.info(f'CUDA: Device {cuda_dev_num} free memory: {cuda_free_memory} bytes')

# Disable chunking if card has less than 10GB VRAM (complete guess)
# This can still encounter out of memory errors depending on audio length
if cuda_free_memory <= 10000000000:
logger.warning(f'CUDA: Device {cuda_dev_num} has low memory, disabling chunking support')
has_chunking = False

# Override compute_type if at least one non-Turing card
if cuda_device_capability <= 70:
logger.warning(f'CUDA: Device {cuda_dev_num} is pre-Turing, forcing int8')
Expand Down Expand Up @@ -255,8 +264,11 @@ def do_whisper(audio_file, model, beam_size, task, detect_language, return_langu
beam_size = long_beam_size
use_chunking = False
if audio_duration > 30*1000:
logger.debug(f'WHISPER: Audio duration is > 30s - activating chunking')
use_chunking = True
if has_chunking:
logger.debug(f'WHISPER: Audio duration is > 30s - activating chunking')
use_chunking = True
else:
logger.warning(f'WHISPER: Audio duration is > 30s but chunking is not available. Will truncate!')

time_end = datetime.datetime.now()
infer_time = time_end - first_time_start
Expand Down

0 comments on commit e484775

Please sign in to comment.