Skip to content

Commit

Permalink
新增:对接F5-TTS gradio API(20241023主线版本)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Oct 23, 2024
1 parent 6a1d09a commit 5c926ef
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 4 deletions.
13 changes: 12 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,16 @@
"streaming": 0
}
},
"f5_tts": {
"gradio_ip_port": "http://127.0.0.1:7860",
"type": "gradio_1023",
"ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav",
"ref_text": "整整策划了半年了,终于现在有结果了",
"model": "F5-TTS",
"remove_silence": false,
"cross_fade_duration": 0.15,
"speed": 1
},
"choose_song": {
"enable": false,
"similarity": 0.5,
Expand Down Expand Up @@ -1993,7 +2003,8 @@
"azure_tts": true,
"fish_speech": true,
"chattts": true,
"cosyvoice": true
"cosyvoice": true,
"f5_tts": true
},
"svc": {
"ddsp_svc": true,
Expand Down
13 changes: 12 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,16 @@
"streaming": 0
}
},
"f5_tts": {
"gradio_ip_port": "http://127.0.0.1:7860",
"type": "gradio_1023",
"ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav",
"ref_text": "整整策划了半年了,终于现在有结果了",
"model": "F5-TTS",
"remove_silence": false,
"cross_fade_duration": 0.15,
"speed": 1
},
"choose_song": {
"enable": false,
"similarity": 0.5,
Expand Down Expand Up @@ -1993,7 +2003,8 @@
"azure_tts": true,
"fish_speech": true,
"chattts": true,
"cosyvoice": true
"cosyvoice": true,
"f5_tts": true
},
"svc": {
"ddsp_svc": true,
Expand Down
14 changes: 14 additions & 0 deletions tests/test_gradio/f5_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from gradio_client import Client, handle_file

client = Client("http://127.0.0.1:7860/")
result = client.predict(
ref_audio_orig=handle_file('F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav'),
ref_text="整整策划了半年了,终于现在有结果了",
gen_text="你好",
model="F5-TTS",
remove_silence=False,
cross_fade_duration=0.15,
speed=1,
api_name="/infer"
)
print(result)
32 changes: 31 additions & 1 deletion utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,7 @@ async def tts_handle(self, message):

voice_tmp_path = await self.my_tts.chattts_api(data)
elif message["tts_type"] == "cosyvoice":
logger.info(message)
logger.debug(message)
data = {
"type": message["data"]["type"],
"gradio_ip_port": message["data"]["gradio_ip_port"],
Expand All @@ -1108,7 +1108,21 @@ async def tts_handle(self, message):
}

voice_tmp_path = await self.my_tts.cosyvoice_api(data)
elif message["tts_type"] == "f5_tts":
logger.debug(message)
data = {
"type": message["data"]["type"],
"gradio_ip_port": message["data"]["gradio_ip_port"],
"ref_audio_orig": message["data"]["ref_audio_orig"],
"ref_text": message["data"]["ref_text"],
"model": message["data"]["model"],
"remove_silence": message["data"]["remove_silence"],
"cross_fade_duration": message["data"]["cross_fade_duration"],
"speed": message["data"]["speed"],
"content": message["content"],
}

voice_tmp_path = await self.my_tts.f5_tts_api(data)
elif message["tts_type"] == "none":
# Audio.voice_tmp_path_queue.put(message)
voice_tmp_path = None
Expand Down Expand Up @@ -2088,6 +2102,22 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type="
}
# 调用接口合成语音
voice_tmp_path = await self.my_tts.cosyvoice_api(data)
elif audio_synthesis_type == "f5_tts":
data = {
"type": self.config.get("f5_tts", "type"),
"gradio_ip_port": self.config.get("f5_tts", "gradio_ip_port"),
"ref_audio_orig": self.config.get("f5_tts", "ref_audio_orig"),
"ref_text": self.config.get("f5_tts", "ref_text"),
"model": self.config.get("f5_tts", "model"),
"remove_silence": self.config.get("f5_tts", "remove_silence"),
"cross_fade_duration": self.config.get("f5_tts", "cross_fade_duration"),
"speed": self.config.get("f5_tts", "speed"),
"content": content
}
# 调用接口合成语音
voice_tmp_path = await self.my_tts.f5_tts_api(data)



return voice_tmp_path

Expand Down
42 changes: 42 additions & 0 deletions utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1321,3 +1321,45 @@ async def cosyvoice_api(self, data):

return None

# F5-TTS (gradio_client-0.16.4,版本太低没法用喵)
async def f5_tts_api(self, data):
"""F5-TTS Gradio的API对接喵
Args:
data (dict): 传参数据喵
Returns:
str: 音频路径
"""
try:
if data["type"] == "gradio_1023":
from gradio_client import Client, handle_file

client = Client(data["gradio_ip_port"])

result = client.predict(
ref_audio_orig=handle_file(data["ref_audio_orig"]),
ref_text=data["ref_text"],
gen_text=data["content"],
model=data["model"],
remove_silence=data["remove_silence"],
cross_fade_duration=float(data["cross_fade_duration"]),
speed=float(data["speed"]),
api_name="/infer"
)

new_file_path = None

if result:
voice_tmp_path = result[0]
new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'f5_tts_' + self.common.get_bj_time(4)), 'f5_tts_' + self.common.get_bj_time(4))

return new_file_path

except Exception as e:
logger.error(traceback.format_exc())
logger.error(f'F5-TTS未知错误,请检查您的F5-TTS WebUI是否启动/配置是否正确,报错内容: {e}')

return None


44 changes: 43 additions & 1 deletion webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -2769,6 +2769,16 @@ def update_config(config_mapping, config, config_data, type="common_config"):
"speed": (input_cosyvoice_api_0819_speed, 'float'),
},
},
"f5_tts": {
"type": (select_f5_tts_type, 'str'),
"gradio_ip_port": (input_f5_tts_gradio_ip_port, 'str'),
"ref_audio_orig": (input_f5_tts_ref_audio_orig, 'str'),
"ref_text": (input_f5_tts_ref_text, 'str'),
"model": (select_f5_tts_model, 'str'),
"remove_silence": (switch_f5_tts_remove_silence, 'bool'),
"cross_fade_duration": (input_f5_tts_cross_fade_duration, 'float'),
"speed": (input_f5_tts_speed, 'float'),
},
}
config_data = update_config(config_mapping, config, config_data, "tts")

Expand Down Expand Up @@ -3202,6 +3212,7 @@ def update_config(config_mapping, config, config_data, type="common_config"):
"fish_speech": (switch_webui_show_card_tts_fish_speech, 'bool'),
"chattts": (switch_webui_show_card_tts_chattts, 'bool'),
"cosyvoice": (switch_webui_show_card_tts_cosyvoice, 'bool'),
"f5_tts": (switch_webui_show_card_tts_f5_tts, 'bool'),
},
"svc": {
"ddsp_svc": (switch_webui_show_card_svc_ddsp_svc, 'bool'),
Expand Down Expand Up @@ -3346,6 +3357,7 @@ def save_config():
'fish_speech': 'fish_speech',
'chattts': 'ChatTTS',
'cosyvoice': 'CosyVoice',
'f5_tts': 'F5-TTS',
}

# 聊天类型所有配置项
Expand Down Expand Up @@ -6091,7 +6103,36 @@ async def fish_speech_load_model(data):
input_cosyvoice_api_0819_speaker = ui.input(label='说话人', value=config.get("cosyvoice", "api_0819", "speaker"), placeholder='').style("width:200px;").tooltip("自行查看")
input_cosyvoice_api_0819_new = ui.input(label='new', value=config.get("cosyvoice", "api_0819", "new"), placeholder='0').style("width:200px;").tooltip("自行查看")
input_cosyvoice_api_0819_speed = ui.input(label='语速', value=config.get("cosyvoice", "api_0819", "speed"), placeholder='1').style("width:200px;").tooltip("语速")


if config.get("webui", "show_card", "tts", "f5_tts"):
with ui.card().style(card_css):
ui.label("F5-TTS")
with ui.row():
select_f5_tts_type = ui.select(
label='类型',
options={"gradio_1023": "gradio_1023"},
value=config.get("f5_tts", "type")
).style("width:150px").tooltip("对接的API类型")
input_f5_tts_gradio_ip_port = ui.input(
label='Gradio API地址',
value=config.get("f5_tts", "gradio_ip_port"),
placeholder='官方webui程序启动后gradio监听的地址',
validation={
'请输入正确格式的URL': lambda value: common.is_url_check(value),
}
).style("width:200px;").tooltip("对接webui的gradio接口,填webui的地址")

select_f5_tts_model = ui.select(
label='模型',
options={'F5-TTS': 'F5-TTS', 'E2-TTS': 'E2-TTS'},
value=config.get("f5_tts", "model")
).style("width:100px;")
input_f5_tts_ref_audio_orig = ui.input(label='参考音频路径', value=config.get("f5_tts", "ref_audio_orig"), placeholder='例如:E:\\1.wav').style("width:200px;").tooltip("参考音频路径")
input_f5_tts_ref_text = ui.input(label='参考文本', value=config.get("f5_tts", "ref_text"), placeholder='音频的文本').style("width:200px;").tooltip("参考文本,例如:E:\\1.wav")
switch_f5_tts_remove_silence = ui.switch('remove_silence', value=config.get("f5_tts", "remove_silence")).style(switch_internal_css)
input_f5_tts_cross_fade_duration = ui.input(label='cross_fade_duration', value=config.get("f5_tts", "cross_fade_duration"), placeholder='0.15').style("width:100px;").tooltip("cross_fade_duration")
input_f5_tts_speed = ui.input(label='语速', value=config.get("f5_tts", "speed"), placeholder='语速').style("width:100px;").tooltip("语速,默认:1")

with ui.tab_panel(svc_page).style(tab_panel_css):
if config.get("webui", "show_card", "svc", "ddsp_svc"):
with ui.card().style(card_css):
Expand Down Expand Up @@ -7228,6 +7269,7 @@ def update_echart_gift():
switch_webui_show_card_tts_fish_speech = ui.switch('fish_speech', value=config.get("webui", "show_card", "tts", "fish_speech")).style(switch_internal_css)
switch_webui_show_card_tts_chattts = ui.switch('ChatTTS', value=config.get("webui", "show_card", "tts", "chattts")).style(switch_internal_css)
switch_webui_show_card_tts_cosyvoice = ui.switch('CosyVoice', value=config.get("webui", "show_card", "tts", "cosyvoice")).style(switch_internal_css)
switch_webui_show_card_tts_f5_tts = ui.switch('F5-TTS', value=config.get("webui", "show_card", "tts", "f5_tts")).style(switch_internal_css)

with ui.card().style(card_css):
ui.label("变声")
Expand Down

0 comments on commit 5c926ef

Please sign in to comment.