From 1ac94c67565fa09c6580244a3887d5424f1915b0 Mon Sep 17 00:00:00 2001
From: Ethan <108598670+echo-lalia@users.noreply.github.com>
Date: Sun, 20 Oct 2024 23:28:34 -0700
Subject: [PATCH] Modify default max tokens for whisper

This adjustment was made based on this error printed when running this script: `The length of `decoder_input_ids`, including special start tokens, prompt tokens, and previous tokens, is 2,  and `max_new_tokens` is 512. Thus, the combined length of `decoder_input_ids` and `max_new_tokens` is: 514. This exceeds the `max_target_positions` of the Whisper model: 448`
---
 vid2cleantxt/transcribe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vid2cleantxt/transcribe.py b/vid2cleantxt/transcribe.py
index c125baf..b75a15e 100644
--- a/vid2cleantxt/transcribe.py
+++ b/vid2cleantxt/transcribe.py
@@ -233,7 +233,7 @@ def transcribe_video_whisper(
     clip_directory,
     clip_name: str,
     chunk_dur: int = 30,
-    chunk_max_new_tokens=512,
+    chunk_max_new_tokens=446,
     temp_dir: str = "audio_chunks",
     manually_clear_cuda_cache=False,
     print_memory_usage=False,
@@ -247,7 +247,7 @@ def transcribe_video_whisper(
     :param clip_directory: the directory of the video file
     :param str clip_name: the name of the video file
     :param int chunk_dur: the duration of each chunk in seconds, default 30
-    :param int chunk_max_new_tokens: max new tokens generated per chunk, default 512 (arbitrary upper bound)
+    :param int chunk_max_new_tokens: max new tokens generated per chunk, default 446 (arbitrary upper bound)
     :param str temp_dir: the directory to store the audio chunks in. default "audio_chunks"
     :param bool manually_clear_cuda_cache: whether to manually clear the cuda cache after each chunk. default False
     :param bool print_memory_usage: whether to print the memory usage at set interval while transcribing. default False