From eb0950f70198a20e12a17dc7af4eb60101e89cdc Mon Sep 17 00:00:00 2001 From: Tao Date: Tue, 19 Nov 2024 16:01:01 +0800 Subject: [PATCH 1/2] fix bug and add openai url config --- settings.cfg.example | 3 ++- srt_translation.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/settings.cfg.example b/settings.cfg.example index 02b7ffc..18bd0e1 100644 --- a/settings.cfg.example +++ b/settings.cfg.example @@ -1,5 +1,6 @@ [option] -#API key for OpenAI API +#API URL and key for OpenAI API +openai-url = https://api.openai.com/v1 openai-apikey = sk- #Target language for translation, e.g. "English", "Chinese", "Japanese" diff --git a/srt_translation.py b/srt_translation.py index 32fca9f..4162451 100644 --- a/srt_translation.py +++ b/srt_translation.py @@ -33,10 +33,12 @@ config.read_string(config_text) # 获取openai_apikey和language +openai_url = config.get('option', 'openai-url') openai_apikey = config.get('option', 'openai-apikey') language_name = config.get('option', 'target-language') # 设置openai的API密钥 +openai.api_base = openai_url openai.api_key = openai_apikey import argparse @@ -90,7 +92,7 @@ def split_text(text): def is_translation_valid(original_text, translated_text): def get_index_lines(text): lines = text.split('\n') - index_lines = [line for line in lines if re.match(r'^\d+$', line.strip())] + index_lines = [line.strip() for line in lines if re.match(r'^\d+$', line.strip())] return index_lines original_index_lines = get_index_lines(original_text) @@ -107,7 +109,7 @@ def translate_text(text): while retries < max_retries: try: completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=[ { "role": "user", From 1dbeaa5a6256c53badb8e9bf76790e9068aae06c Mon Sep 17 00:00:00 2001 From: Tao Date: Tue, 19 Nov 2024 22:59:46 +0800 Subject: [PATCH 2/2] fix bug --- srt_translation.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/srt_translation.py b/srt_translation.py index 4162451..36fae43 100644 --- a/srt_translation.py +++ b/srt_translation.py @@ -95,13 +95,23 @@ def get_index_lines(text): index_lines = [line.strip() for line in lines if re.match(r'^\d+$', line.strip())] return index_lines + def get_subtitle_lines(text): + paras = re.split(r'(\n\s*\n)', text.strip()) + # 判断每段是否至少三行 + subtitle_lines = [para for para in paras if len(para.split('\n')) >= 3] + return subtitle_lines + original_index_lines = get_index_lines(original_text) translated_index_lines = get_index_lines(translated_text) + lines_match = original_index_lines == translated_index_lines + + original_index_para = get_subtitle_lines(original_text) + translated_index_para = get_subtitle_lines(translated_text) + # 判断两个数组个数相等 + paras_match = len(original_index_para) == len(translated_index_para) - print(original_text, original_index_lines) - print(translated_text, translated_index_lines) + return lines_match and paras_match - return original_index_lines == translated_index_lines def translate_text(text): max_retries = 3 retries = 0 @@ -111,9 +121,13 @@ def translate_text(text): completion = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[ + { + "role": "system", + "content": f"Translate the following subtitle text into {language_name}, but keep the subtitle number and timeline unchanged. Keep the format and line breaks. Please pay attention to the context to make whole dialog consistent. Use natural language and avoid word-for-word translation.", + }, { "role": "user", - "content": f"Translate the following subtitle text into {language_name}, but keep the subtitle number and timeline unchanged: \n{text}", + "content": text, } ], )