-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAdjustLrc.py
213 lines (194 loc) · 9.63 KB
/
AdjustLrc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import re
import os
import zhipuai
import json
import requests
if_need_LLM_add_punctuation = True
# 是否需要调用 AI 为文本添加标点符号以便处理,主要用于中文无标点场景。如果为 True,需要在 config.json 中配置相关 API。
# 如果 if_need_LLM_add_punctuation = True, 从 config.json 中读取相关 API
if if_need_LLM_add_punctuation:
with open("config.json", "r") as f:
config = json.load(f)
zhipuai_api_key = config["zhipuai_api_key"]
openai_api_key =config["openai_api_key"]
openai_api_url = config["openai_api_url"]
deepseek_api_key = config["deepseek_api_key"]
# 智谱,垃圾
def add_punctuation_zhipuai(inputText):
zhipuai.api_key = zhipuai_api_key
response = zhipuai.model_api.invoke(
model="chatglm_std",
prompt=[
{"role": "user", "content": f"{inputText}\n\n这是一个音频的转文字结果,以 .lrc 格式存储。请结合语义和上下文为每句话添加标点符号,禁止删除或新增时间点,只需返回修改后的 lrc 文本。"},
],
temperature=0.9, # 值越小结果越稳定
top_p = 0.7
)
# Sample response:{'code': 200, 'msg': '操作成功', 'data': {'request_id': '7941314437787463250', 'task_id': '7941314437787463250', 'task_status': 'SUCCESS', 'choices': [{'role': 'assistant', 'content': '" 说你们想听哪些历史人物?"'}], 'usage': {'total_tokens': 9}}, 'success': True}
# 直接打印 choices 中的第一个 content
# print(response)
# 检验是否成功
if response["success"] == False:
print("请求失败")
return None
else:
print("智谱 AI Token 数量:{},花费{}元".format(response["data"]["usage"]["total_tokens"], response["data"]["usage"]["total_tokens"]/1000*0.002))
outputText = response["data"]["choices"][0]["content"]
# 检查 outputText 最外层是否有双引号,如果有则去掉
if outputText.startswith('"') and outputText.endswith('"'):
outputText = outputText[1:-1]
return outputText
# 调用 gpt3.5 为输入的文本添加标点符号。使用自定义的 api url。函数功能和 add_punctuation_zhipuai 相同。
def add_punctuation_openai(inputText):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "system", "content": "你是为音频转录生成的 LRC 格式字幕添加标点符号的专家。保留原始单词,仅插入必要的标点符号,例如句号、逗号、大写字母、美元符号或百分号等符号以及格式。如果结合下一行判断此行无需添加标点,则可以不添加标点。仅使用提供的上下文,返回添加标点后的 LRC 格式字幕文本"},
{"role": "user", "content": f"{inputText}"}
],
"frequency_penalty":0,
"presence_penalty":0,
"temperature":0.6,
"top_p":1
}
response = requests.post(f"{openai_api_url}/v1/chat/completions", headers=headers, json=data)
response_data = response.json()
if "choices" in response_data and len(response_data["choices"]) > 0:
# $0.0005 为 prompt_tokens 的单价,$0.0015 为 completion_tokens 的单价,根据单价计算本次请求的总花费:prompt_tokens*0.0015 + completion_tokens*0.002
print("openai api Token 数量:{},花费{}元".format(response_data["usage"]["total_tokens"], response_data["usage"]["prompt_tokens"]/1000*0.0005*7.2 + response_data["usage"]["completion_tokens"]/1000*0.0015*7.2))
outputText = response_data["choices"][0]["message"]["content"]
print('##outputText:\n', outputText)
return outputText
else:
print("response_data", response_data)
# 抛出错误
raise Exception("openai api 返回的数据不正确")
# 使用 deepseek api 添加标点
def add_punctuation_deepseek(inputText):
# https://platform.deepseek.com/api-docs/
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {deepseek_api_key}"
}
data = {
"model": "deepseek-chat",
"messages": [
{"role": "system", "content": "你是为音频转录生成的 LRC 格式字幕添加标点符号的专家。保留原始单词,仅插入必要的标点符号,例如句号、逗号、大写字母、美元符号或百分号等符号以及格式。如果结合下一行判断此行无需添加标点,则可以不添加标点。仅使用提供的上下文,返回添加标点后的 LRC 格式字幕文本"},
{"role": "user", "content": f"{inputText}"}
],
"temperature":0.6
}
response = requests.post("https://api.deepseek.com/chat/completions", headers=headers, json=data)
response_data = response.json()
if "choices" in response_data and len(response_data["choices"]) > 0:
outputText = response_data["choices"][0]["message"]["content"]
print('##outputText:\n', outputText)
return outputText
else:
print("response_data", response_data)
# 抛出错误
raise Exception("deepseek api 返回的数据不正确")
# 为字幕文本添加标点,可以使用智谱AI,也可以使用 chatgpt。输入 captionList,返回 new_groups
def add_punctuation_service(captionList,service='deepseek'):
# 最大中文长度,采取 max_token/2.5 估算。
max_text_length_per_request = 1500
# 将 captionList 的所有字幕文本(groups 里的第二项)拼接成一个字符串列表,每个字符串以换行符连接,长度不能超过 max_text_length_per_request。
text_list = []
text = ''
for i in range(len(captionList)):
if len(text) + len(captionList[i]) < max_text_length_per_request:
text = f"{text}\n{captionList[i]}"
else:
text_list.append(text)
text = ''
text_list.append(text)
# 对 text_list 中每一项调用 AI 为文本添加标点符号。合并所有返回的文本,然后按换行符分割成列表,每一项就是新的字幕文本。
new_text_list = []
for text in text_list:
if service == 'zhipuai':
new_text = add_punctuation_zhipuai(text)
elif service == 'openai':
new_text = add_punctuation_openai(text)
elif service == 'deepseek':
new_text = add_punctuation_deepseek(text)
new_text_list.append(new_text)
new_text = '\n'.join(new_text_list)
print('文本处理后:',new_text)
return new_text
def generate_output_file_path(lrc_file):
# 生成 output_file 的路径和文件名
directory = os.path.dirname(lrc_file)
filename = os.path.basename(lrc_file)
new_filename = filename.replace('.lrc', '_add_punctuation.lrc')
output_file = os.path.join(directory, new_filename)
print(output_file)
return output_file
def lrc2srt_file_path(lrc_file):
# 生成 srt_file 的路径和文件名
directory = os.path.dirname(lrc_file)
filename = os.path.basename(lrc_file)
new_filename = filename.replace('.lrc', '.srt')
srt_file = os.path.join(directory, new_filename)
print(srt_file)
return srt_file
# 将 lrc 格式字幕转换成 srt 格式,输入 lrc 文本,返回 srt 文本。
def parse_lrc_timestamp(timestamp):
try:
parts = timestamp.split(':')
if len(parts) == 3:
hours, minutes, seconds = parts
elif len(parts) == 2:
hours = 0
minutes, seconds = parts
else:
return None
seconds, milliseconds = seconds.split('.')
hours, minutes, seconds, milliseconds = int(hours), int(minutes), int(seconds), int(milliseconds)
return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000
except ValueError:
return None
def format_time(seconds):
hours = int(seconds) // 3600
minutes = (int(seconds) % 3600) // 60
seconds = int(seconds) % 60
milliseconds = int((seconds - int(seconds)) * 1000)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def lrc_to_srt(lrc_content):
subs = []
pattern = r'\[(\d+:\d+:\d+\.\d+|\d+:\d+\.\d+)\](.*)'
matches = re.findall(pattern, lrc_content)
for idx, match in enumerate(matches):
timestamp, text = match
start_time = parse_lrc_timestamp(timestamp)
if start_time is not None:
end_time = parse_lrc_timestamp(matches[idx + 1][0]) if idx + 1 < len(matches) else start_time + 1
subs.append(f"{len(subs) + 1}\n{format_time(start_time)} --> {format_time(end_time)}\n{text.strip()}\n")
return ''.join(subs)
# 主函数,读取 lrc 文件,调用 add_punctuation_service 为字幕文本添加标点,然后将结果写入新的 lrc 文件。
def main():
lrc_file = "D:/我的坚果云/中国城夜总会剿匪记.lrc"
# LLM_service = 'deepseek'
LLM_service = 'openai'
# 读取 lrc 文件
with open(lrc_file, "r", encoding="utf-8") as f:
lrc = f.read()
# 将 lrc 按换行符分割成列表 captaionList,每一项就是一条字幕
captionList = lrc.split("\n")
# print("captionList", captionList)
# newCaption = add_punctuation_service(captionList,LLM_service)
newCaption = lrc
# 将 newCaption 写入新的 lrc 文件
output_file = generate_output_file_path(lrc_file)
with open(output_file, "w", encoding="utf-8") as f:
f.write(newCaption)
# 将 lrc 转换成 srt
srt = lrc_to_srt(newCaption)
# 将 srt 写入新的 srt 文件
with open(lrc2srt_file_path(output_file), "w", encoding="utf-8") as f:
f.write(srt)
if __name__ == "__main__":
main()