From 1ac94c67565fa09c6580244a3887d5424f1915b0 Mon Sep 17 00:00:00 2001 From: Ethan <108598670+echo-lalia@users.noreply.github.com> Date: Sun, 20 Oct 2024 23:28:34 -0700 Subject: [PATCH] Modify default max tokens for whisper This adjustment was made based on this error printed when running this script: `The length of `decoder_input_ids`, including special start tokens, prompt tokens, and previous tokens, is 2, and `max_new_tokens` is 512. Thus, the combined length of `decoder_input_ids` and `max_new_tokens` is: 514. This exceeds the `max_target_positions` of the Whisper model: 448` --- vid2cleantxt/transcribe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vid2cleantxt/transcribe.py b/vid2cleantxt/transcribe.py index c125baf..b75a15e 100644 --- a/vid2cleantxt/transcribe.py +++ b/vid2cleantxt/transcribe.py @@ -233,7 +233,7 @@ def transcribe_video_whisper( clip_directory, clip_name: str, chunk_dur: int = 30, - chunk_max_new_tokens=512, + chunk_max_new_tokens=446, temp_dir: str = "audio_chunks", manually_clear_cuda_cache=False, print_memory_usage=False, @@ -247,7 +247,7 @@ def transcribe_video_whisper( :param clip_directory: the directory of the video file :param str clip_name: the name of the video file :param int chunk_dur: the duration of each chunk in seconds, default 30 - :param int chunk_max_new_tokens: max new tokens generated per chunk, default 512 (arbitrary upper bound) + :param int chunk_max_new_tokens: max new tokens generated per chunk, default 446 (arbitrary upper bound) :param str temp_dir: the directory to store the audio chunks in. default "audio_chunks" :param bool manually_clear_cuda_cache: whether to manually clear the cuda cache after each chunk. default False :param bool print_memory_usage: whether to print the memory usage at set interval while transcribing. default False