Disable chunking availability if low-memory #43

toverainc · Apr 10, 2023 · e484775 · e484775
1 parent 4e32596
commit e484775
Showing 1 changed file with 14 additions and 2 deletions.
diff --git a/main.py b/main.py
@@ -125,6 +125,9 @@ async def create_datagram_endpoint(self, protocol_factory,
 # model threads
 model_threads = settings.model_threads
 
+# Default to supporting chunking
+has_chunking = True
+
 # Try CUDA
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -151,6 +154,12 @@ async def create_datagram_endpoint(self, protocol_factory,
         logger.info(f'CUDA: Device {cuda_dev_num} total memory: {cuda_total_memory} bytes')
         logger.info(f'CUDA: Device {cuda_dev_num} free memory: {cuda_free_memory} bytes')
 
+        # Disable chunking if card has less than 10GB VRAM (complete guess)
+        # This can still encounter out of memory errors depending on audio length
+        if cuda_free_memory <= 10000000000:
+            logger.warning(f'CUDA: Device {cuda_dev_num} has low memory, disabling chunking support')
+            has_chunking = False
+
         # Override compute_type if at least one non-Turing card
         if cuda_device_capability <= 70:
             logger.warning(f'CUDA: Device {cuda_dev_num} is pre-Turing, forcing int8')
@@ -255,8 +264,11 @@ def do_whisper(audio_file, model, beam_size, task, detect_language, return_langu
         beam_size = long_beam_size
     use_chunking = False
     if audio_duration > 30*1000:
-        logger.debug(f'WHISPER: Audio duration is > 30s - activating chunking')
-        use_chunking = True
+        if has_chunking:
+            logger.debug(f'WHISPER: Audio duration is > 30s - activating chunking')
+            use_chunking = True
+        else:
+            logger.warning(f'WHISPER: Audio duration is > 30s but chunking is not available. Will truncate!')
 
     time_end = datetime.datetime.now()
     infer_time = time_end - first_time_start