-
Notifications
You must be signed in to change notification settings - Fork 852
/
config.yaml
167 lines (134 loc) · 4.34 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# * Settings marked with * are advanced settings that won't appear in the Streamlit page and can only be modified manually in config.py
version: "2.1.2"
## ======================== Basic Settings ======================== ##
# API settings
api:
key: 'YOUR_API_KEY'
base_url: 'https://api.302.ai'
model: 'gemini-2.0-flash-exp'
# Language settings, written into the prompt, can be described in natural language
target_language: '简体中文'
# Whether to use Demucs for vocal separation before transcription
demucs: false
whisper:
# ["medium", "large-v3", "large-v3-turbo"]. Note: for zh model will force to use Belle/large-v3
model: 'large-v3'
# Whisper specified recognition language [en, zh, ...]
language: 'en'
detected_language: 'en'
# Video resolution [0x0, 640x360, 1920x1080] 0x0 will generate a 0-second black video placeholder
resolution: '1920x1080'
## ======================== Advanced Settings ======================== ##
# *Default resolution for downloading YouTube videos [360, 1080, best]
ytb_resolution: '1080'
subtitle:
# *Maximum length of each subtitle line in characters
max_length: 75
# *Translated subtitles are slightly larger than source subtitles, affecting the reference length for subtitle splitting
target_multiplier: 1.2
# * Summary length, set low to 2k if using local LLM
summary_length: 8000
# *Number of LLM multi-threaded accesses, set to 1 if using local LLM
max_workers: 4
# *Maximum number of words for the first rough cut, below 18 will cut too finely affecting translation, above 22 is too long and will make subsequent subtitle splitting difficult to align
max_split_length: 20
# *Whether to reflect the translation result in the original text
reflect_translate: true
# *Whether to pause after extracting professional terms and before translation, allowing users to manually adjust the terminology table output\log\terminology.json
pause_before_translate: false
## ======================== Dubbing Settings ======================== ##
# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]
tts_method: 'azure_tts'
# SiliconFlow FishTTS
sf_fish_tts:
# SiliconFlow API key
api_key: 'YOUR_API_KEY'
# only for mode "preset"
voice: 'anna'
# *only for mode "custom", dont set manually
custom_name: ''
voice_id: ''
# preset, custom, dynamic
mode: "preset"
# OpenAI TTS-1 API configuration, 302.ai API only
openai_tts:
api_key: 'YOUR_302_API_KEY'
voice: 'alloy'
# Azure configuration, 302.ai API only
azure_tts:
api_key: 'YOUR_302_API_KEY'
voice: 'zh-CN-YunfengNeural'
# FishTTS configuration, 302.ai API only
fish_tts:
api_key: 'YOUR_302_API_KEY'
character: 'AD学姐'
character_id_dict:
'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1'
'丁真': '54a5170264694bfc8e9ad98df7bd89c3'
# Edge TTS configuration
edge_tts:
voice: 'zh-CN-XiaoxiaoNeural'
# SoVITS configuration
gpt_sovits:
character: 'Huanyuv2'
refer_mode: 3
# *Audio speed range
speed_factor:
min: 1
accept: 1.2 # Maximum acceptable speed
max: 1.4
# *Merge audio configuration
min_subtitle_duration: 2.5 # Minimum subtitle duration, will be forcibly extended
min_trim_duration: 3.5 # Subtitles shorter than this value won't be split
tolerance: 1.5 # Allowed extension time to the next subtitle
# Volume settings
dub_volume: 1.5 # *Dubbed audio volume (1.5 = 150%, most original dubbing audio is relatively quiet)
## ======================== Additional settings 请勿修改======================== ##
# Whisper model directory
model_dir: './_model_cache'
# Supported upload video formats
allowed_video_formats:
- 'mp4'
- 'mov'
- 'avi'
- 'mkv'
- 'flv'
- 'wmv'
- 'webm'
allowed_audio_formats:
- 'wav'
- 'mp3'
- 'flac'
- 'm4a'
# LLMs that support returning JSON format
llm_support_json:
- 'gpt-4o'
- 'gpt-4o-mini'
- 'gemini-2.0-flash-exp'
- 'deepseek-coder'
# have problems
# - 'Qwen/Qwen2.5-72B-Instruct'
# - 'Qwen/Qwen2.5-Coder-32B-Instruct'
# - 'Qwen/Qwen2.5-Chat-72B-Instruct-128K'
# Spacy models
spacy_model_map:
en: 'en_core_web_md'
ru: 'ru_core_news_md'
fr: 'fr_core_news_md'
ja: 'ja_core_news_md'
es: 'es_core_news_md'
de: 'de_core_news_md'
it: 'it_core_news_md'
zh: 'zh_core_web_md'
# Languages that use space as separator
language_split_with_space:
- 'en'
- 'es'
- 'fr'
- 'de'
- 'it'
- 'ru'
# Languages that do not use space as separator
language_split_without_space:
- 'zh'
- 'ja'