diff --git a/config.json b/config.json index 13723b20..1da94d3b 100644 --- a/config.json +++ b/config.json @@ -943,6 +943,16 @@ "streaming": 0 } }, + "f5_tts": { + "gradio_ip_port": "http://127.0.0.1:7860", + "type": "gradio_1023", + "ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav", + "ref_text": "整整策划了半年了,终于现在有结果了", + "model": "F5-TTS", + "remove_silence": false, + "cross_fade_duration": 0.15, + "speed": 1 + }, "choose_song": { "enable": false, "similarity": 0.5, @@ -1993,7 +2003,8 @@ "azure_tts": true, "fish_speech": true, "chattts": true, - "cosyvoice": true + "cosyvoice": true, + "f5_tts": true }, "svc": { "ddsp_svc": true, diff --git a/config.json.bak b/config.json.bak index 13723b20..1da94d3b 100644 --- a/config.json.bak +++ b/config.json.bak @@ -943,6 +943,16 @@ "streaming": 0 } }, + "f5_tts": { + "gradio_ip_port": "http://127.0.0.1:7860", + "type": "gradio_1023", + "ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav", + "ref_text": "整整策划了半年了,终于现在有结果了", + "model": "F5-TTS", + "remove_silence": false, + "cross_fade_duration": 0.15, + "speed": 1 + }, "choose_song": { "enable": false, "similarity": 0.5, @@ -1993,7 +2003,8 @@ "azure_tts": true, "fish_speech": true, "chattts": true, - "cosyvoice": true + "cosyvoice": true, + "f5_tts": true }, "svc": { "ddsp_svc": true, diff --git a/tests/test_gradio/f5_tts.py b/tests/test_gradio/f5_tts.py new file mode 100644 index 00000000..a53dcd33 --- /dev/null +++ b/tests/test_gradio/f5_tts.py @@ -0,0 +1,14 @@ +from gradio_client import Client, handle_file + +client = Client("http://127.0.0.1:7860/") +result = client.predict( + ref_audio_orig=handle_file('F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav'), + ref_text="整整策划了半年了,终于现在有结果了", + gen_text="你好", + model="F5-TTS", + remove_silence=False, + cross_fade_duration=0.15, + speed=1, + api_name="/infer" +) +print(result) \ No newline at end of file diff --git a/utils/audio.py b/utils/audio.py index 165d87a1..35b20edc 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -1097,7 +1097,7 @@ async def tts_handle(self, message): voice_tmp_path = await self.my_tts.chattts_api(data) elif message["tts_type"] == "cosyvoice": - logger.info(message) + logger.debug(message) data = { "type": message["data"]["type"], "gradio_ip_port": message["data"]["gradio_ip_port"], @@ -1108,7 +1108,21 @@ async def tts_handle(self, message): } voice_tmp_path = await self.my_tts.cosyvoice_api(data) + elif message["tts_type"] == "f5_tts": + logger.debug(message) + data = { + "type": message["data"]["type"], + "gradio_ip_port": message["data"]["gradio_ip_port"], + "ref_audio_orig": message["data"]["ref_audio_orig"], + "ref_text": message["data"]["ref_text"], + "model": message["data"]["model"], + "remove_silence": message["data"]["remove_silence"], + "cross_fade_duration": message["data"]["cross_fade_duration"], + "speed": message["data"]["speed"], + "content": message["content"], + } + voice_tmp_path = await self.my_tts.f5_tts_api(data) elif message["tts_type"] == "none": # Audio.voice_tmp_path_queue.put(message) voice_tmp_path = None @@ -2088,6 +2102,22 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type=" } # 调用接口合成语音 voice_tmp_path = await self.my_tts.cosyvoice_api(data) + elif audio_synthesis_type == "f5_tts": + data = { + "type": self.config.get("f5_tts", "type"), + "gradio_ip_port": self.config.get("f5_tts", "gradio_ip_port"), + "ref_audio_orig": self.config.get("f5_tts", "ref_audio_orig"), + "ref_text": self.config.get("f5_tts", "ref_text"), + "model": self.config.get("f5_tts", "model"), + "remove_silence": self.config.get("f5_tts", "remove_silence"), + "cross_fade_duration": self.config.get("f5_tts", "cross_fade_duration"), + "speed": self.config.get("f5_tts", "speed"), + "content": content + } + # 调用接口合成语音 + voice_tmp_path = await self.my_tts.f5_tts_api(data) + + return voice_tmp_path diff --git a/utils/audio_handle/my_tts.py b/utils/audio_handle/my_tts.py index 4c2634b2..740d1466 100644 --- a/utils/audio_handle/my_tts.py +++ b/utils/audio_handle/my_tts.py @@ -1321,3 +1321,45 @@ async def cosyvoice_api(self, data): return None + # F5-TTS (gradio_client-0.16.4,版本太低没法用喵) + async def f5_tts_api(self, data): + """F5-TTS Gradio的API对接喵 + + Args: + data (dict): 传参数据喵 + + Returns: + str: 音频路径 + """ + try: + if data["type"] == "gradio_1023": + from gradio_client import Client, handle_file + + client = Client(data["gradio_ip_port"]) + + result = client.predict( + ref_audio_orig=handle_file(data["ref_audio_orig"]), + ref_text=data["ref_text"], + gen_text=data["content"], + model=data["model"], + remove_silence=data["remove_silence"], + cross_fade_duration=float(data["cross_fade_duration"]), + speed=float(data["speed"]), + api_name="/infer" + ) + + new_file_path = None + + if result: + voice_tmp_path = result[0] + new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'f5_tts_' + self.common.get_bj_time(4)), 'f5_tts_' + self.common.get_bj_time(4)) + + return new_file_path + + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f'F5-TTS未知错误,请检查您的F5-TTS WebUI是否启动/配置是否正确,报错内容: {e}') + + return None + + diff --git a/webui.py b/webui.py index 62ca5be6..c3498230 100644 --- a/webui.py +++ b/webui.py @@ -2769,6 +2769,16 @@ def update_config(config_mapping, config, config_data, type="common_config"): "speed": (input_cosyvoice_api_0819_speed, 'float'), }, }, + "f5_tts": { + "type": (select_f5_tts_type, 'str'), + "gradio_ip_port": (input_f5_tts_gradio_ip_port, 'str'), + "ref_audio_orig": (input_f5_tts_ref_audio_orig, 'str'), + "ref_text": (input_f5_tts_ref_text, 'str'), + "model": (select_f5_tts_model, 'str'), + "remove_silence": (switch_f5_tts_remove_silence, 'bool'), + "cross_fade_duration": (input_f5_tts_cross_fade_duration, 'float'), + "speed": (input_f5_tts_speed, 'float'), + }, } config_data = update_config(config_mapping, config, config_data, "tts") @@ -3202,6 +3212,7 @@ def update_config(config_mapping, config, config_data, type="common_config"): "fish_speech": (switch_webui_show_card_tts_fish_speech, 'bool'), "chattts": (switch_webui_show_card_tts_chattts, 'bool'), "cosyvoice": (switch_webui_show_card_tts_cosyvoice, 'bool'), + "f5_tts": (switch_webui_show_card_tts_f5_tts, 'bool'), }, "svc": { "ddsp_svc": (switch_webui_show_card_svc_ddsp_svc, 'bool'), @@ -3346,6 +3357,7 @@ def save_config(): 'fish_speech': 'fish_speech', 'chattts': 'ChatTTS', 'cosyvoice': 'CosyVoice', + 'f5_tts': 'F5-TTS', } # 聊天类型所有配置项 @@ -6091,7 +6103,36 @@ async def fish_speech_load_model(data): input_cosyvoice_api_0819_speaker = ui.input(label='说话人', value=config.get("cosyvoice", "api_0819", "speaker"), placeholder='').style("width:200px;").tooltip("自行查看") input_cosyvoice_api_0819_new = ui.input(label='new', value=config.get("cosyvoice", "api_0819", "new"), placeholder='0').style("width:200px;").tooltip("自行查看") input_cosyvoice_api_0819_speed = ui.input(label='语速', value=config.get("cosyvoice", "api_0819", "speed"), placeholder='1').style("width:200px;").tooltip("语速") - + + if config.get("webui", "show_card", "tts", "f5_tts"): + with ui.card().style(card_css): + ui.label("F5-TTS") + with ui.row(): + select_f5_tts_type = ui.select( + label='类型', + options={"gradio_1023": "gradio_1023"}, + value=config.get("f5_tts", "type") + ).style("width:150px").tooltip("对接的API类型") + input_f5_tts_gradio_ip_port = ui.input( + label='Gradio API地址', + value=config.get("f5_tts", "gradio_ip_port"), + placeholder='官方webui程序启动后gradio监听的地址', + validation={ + '请输入正确格式的URL': lambda value: common.is_url_check(value), + } + ).style("width:200px;").tooltip("对接webui的gradio接口,填webui的地址") + + select_f5_tts_model = ui.select( + label='模型', + options={'F5-TTS': 'F5-TTS', 'E2-TTS': 'E2-TTS'}, + value=config.get("f5_tts", "model") + ).style("width:100px;") + input_f5_tts_ref_audio_orig = ui.input(label='参考音频路径', value=config.get("f5_tts", "ref_audio_orig"), placeholder='例如:E:\\1.wav').style("width:200px;").tooltip("参考音频路径") + input_f5_tts_ref_text = ui.input(label='参考文本', value=config.get("f5_tts", "ref_text"), placeholder='音频的文本').style("width:200px;").tooltip("参考文本,例如:E:\\1.wav") + switch_f5_tts_remove_silence = ui.switch('remove_silence', value=config.get("f5_tts", "remove_silence")).style(switch_internal_css) + input_f5_tts_cross_fade_duration = ui.input(label='cross_fade_duration', value=config.get("f5_tts", "cross_fade_duration"), placeholder='0.15').style("width:100px;").tooltip("cross_fade_duration") + input_f5_tts_speed = ui.input(label='语速', value=config.get("f5_tts", "speed"), placeholder='语速').style("width:100px;").tooltip("语速,默认:1") + with ui.tab_panel(svc_page).style(tab_panel_css): if config.get("webui", "show_card", "svc", "ddsp_svc"): with ui.card().style(card_css): @@ -7228,6 +7269,7 @@ def update_echart_gift(): switch_webui_show_card_tts_fish_speech = ui.switch('fish_speech', value=config.get("webui", "show_card", "tts", "fish_speech")).style(switch_internal_css) switch_webui_show_card_tts_chattts = ui.switch('ChatTTS', value=config.get("webui", "show_card", "tts", "chattts")).style(switch_internal_css) switch_webui_show_card_tts_cosyvoice = ui.switch('CosyVoice', value=config.get("webui", "show_card", "tts", "cosyvoice")).style(switch_internal_css) + switch_webui_show_card_tts_f5_tts = ui.switch('F5-TTS', value=config.get("webui", "show_card", "tts", "f5_tts")).style(switch_internal_css) with ui.card().style(card_css): ui.label("变声")