新增：对接F5-TTS gradio API（20241023主线版本）

Ikaros-521 · Oct 23, 2024 · 5c926ef · 5c926ef
1 parent 6a1d09a
commit 5c926ef
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 4 deletions.
diff --git a/config.json b/config.json
@@ -943,6 +943,16 @@
       "streaming": 0
     }
   },
+  "f5_tts": {
+    "gradio_ip_port": "http://127.0.0.1:7860",
+    "type": "gradio_1023",
+    "ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav",
+    "ref_text": "整整策划了半年了，终于现在有结果了",
+    "model": "F5-TTS",
+    "remove_silence": false,
+    "cross_fade_duration": 0.15,
+    "speed": 1
+  },
   "choose_song": {
     "enable": false,
     "similarity": 0.5,
@@ -1993,7 +2003,8 @@
         "azure_tts": true,
         "fish_speech": true,
         "chattts": true,
-        "cosyvoice": true
+        "cosyvoice": true,
+        "f5_tts": true
       },
       "svc": {
         "ddsp_svc": true,

diff --git a/config.json.bak b/config.json.bak
@@ -943,6 +943,16 @@
       "streaming": 0
     }
   },
+  "f5_tts": {
+    "gradio_ip_port": "http://127.0.0.1:7860",
+    "type": "gradio_1023",
+    "ref_audio_orig": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav",
+    "ref_text": "整整策划了半年了，终于现在有结果了",
+    "model": "F5-TTS",
+    "remove_silence": false,
+    "cross_fade_duration": 0.15,
+    "speed": 1
+  },
   "choose_song": {
     "enable": false,
     "similarity": 0.5,
@@ -1993,7 +2003,8 @@
         "azure_tts": true,
         "fish_speech": true,
         "chattts": true,
-        "cosyvoice": true
+        "cosyvoice": true,
+        "f5_tts": true
       },
       "svc": {
         "ddsp_svc": true,

diff --git a/tests/test_gradio/f5_tts.py b/tests/test_gradio/f5_tts.py
@@ -0,0 +1,14 @@
+from gradio_client import Client, handle_file
+
+client = Client("http://127.0.0.1:7860/")
+result = client.predict(
+		ref_audio_orig=handle_file('F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav'),
+		ref_text="整整策划了半年了，终于现在有结果了",
+		gen_text="你好",
+		model="F5-TTS",
+		remove_silence=False,
+		cross_fade_duration=0.15,
+		speed=1,
+		api_name="/infer"
+)
+print(result)
diff --git a/utils/audio.py b/utils/audio.py
@@ -1097,7 +1097,7 @@ async def tts_handle(self, message):
 
                 voice_tmp_path = await self.my_tts.chattts_api(data)  
             elif message["tts_type"] == "cosyvoice":
-                logger.info(message)
+                logger.debug(message)
                 data = {
                     "type": message["data"]["type"],
                     "gradio_ip_port": message["data"]["gradio_ip_port"],
@@ -1108,7 +1108,21 @@ async def tts_handle(self, message):
                 }
 
                 voice_tmp_path = await self.my_tts.cosyvoice_api(data)  
+            elif message["tts_type"] == "f5_tts":
+                logger.debug(message)
+                data = {
+                    "type": message["data"]["type"],
+                    "gradio_ip_port": message["data"]["gradio_ip_port"],
+                    "ref_audio_orig": message["data"]["ref_audio_orig"],
+                    "ref_text": message["data"]["ref_text"],
+                    "model": message["data"]["model"],
+                    "remove_silence": message["data"]["remove_silence"],
+                    "cross_fade_duration": message["data"]["cross_fade_duration"],
+                    "speed": message["data"]["speed"],
+                    "content": message["content"],
+                }
 
+                voice_tmp_path = await self.my_tts.f5_tts_api(data)  
             elif message["tts_type"] == "none":
                 # Audio.voice_tmp_path_queue.put(message)
                 voice_tmp_path = None
@@ -2088,6 +2102,22 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type="
             }
             # 调用接口合成语音
             voice_tmp_path = await self.my_tts.cosyvoice_api(data)
+        elif audio_synthesis_type == "f5_tts":
+            data = {
+                "type": self.config.get("f5_tts", "type"),
+                "gradio_ip_port": self.config.get("f5_tts", "gradio_ip_port"),
+                "ref_audio_orig": self.config.get("f5_tts", "ref_audio_orig"),
+                "ref_text": self.config.get("f5_tts", "ref_text"),
+                "model": self.config.get("f5_tts", "model"),
+                "remove_silence": self.config.get("f5_tts", "remove_silence"),
+                "cross_fade_duration": self.config.get("f5_tts", "cross_fade_duration"),
+                "speed": self.config.get("f5_tts", "speed"),
+                "content": content
+            }
+            # 调用接口合成语音
+            voice_tmp_path = await self.my_tts.f5_tts_api(data)
+
+
 
         return voice_tmp_path
 

diff --git a/utils/audio_handle/my_tts.py b/utils/audio_handle/my_tts.py
@@ -1321,3 +1321,45 @@ async def cosyvoice_api(self, data):
 
         return None
 
+    # F5-TTS （gradio_client-0.16.4，版本太低没法用喵）
+    async def f5_tts_api(self, data):
+        """F5-TTS Gradio的API对接喵
+
+        Args:
+            data (dict): 传参数据喵
+
+        Returns:
+            str: 音频路径
+        """
+        try:
+            if data["type"] == "gradio_1023":
+                from gradio_client import Client, handle_file
+
+                client = Client(data["gradio_ip_port"])
+
+                result = client.predict(
+                    ref_audio_orig=handle_file(data["ref_audio_orig"]),
+                    ref_text=data["ref_text"],
+                    gen_text=data["content"],
+                    model=data["model"],
+                    remove_silence=data["remove_silence"],
+                    cross_fade_duration=float(data["cross_fade_duration"]),
+                    speed=float(data["speed"]),
+                    api_name="/infer"
+                )
+
+                new_file_path = None
+
+                if result:
+                    voice_tmp_path = result[0]
+                    new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'f5_tts_' + self.common.get_bj_time(4)), 'f5_tts_' + self.common.get_bj_time(4))
+
+                return new_file_path
+
+        except Exception as e:
+            logger.error(traceback.format_exc())
+            logger.error(f'F5-TTS未知错误，请检查您的F5-TTS WebUI是否启动/配置是否正确，报错内容: {e}')
+
+        return None
+
+
diff --git a/webui.py b/webui.py
@@ -2769,6 +2769,16 @@ def update_config(config_mapping, config, config_data, type="common_config"):
                             "speed": (input_cosyvoice_api_0819_speed, 'float'),
                         },
                     },
+                    "f5_tts": {
+                        "type": (select_f5_tts_type, 'str'),
+                        "gradio_ip_port": (input_f5_tts_gradio_ip_port, 'str'),
+                        "ref_audio_orig": (input_f5_tts_ref_audio_orig, 'str'),
+                        "ref_text": (input_f5_tts_ref_text, 'str'),
+                        "model": (select_f5_tts_model, 'str'),
+                        "remove_silence": (switch_f5_tts_remove_silence, 'bool'),
+                        "cross_fade_duration": (input_f5_tts_cross_fade_duration, 'float'),
+                        "speed": (input_f5_tts_speed, 'float'),
+                    },
                 }
                 config_data = update_config(config_mapping, config, config_data, "tts")
 
@@ -3202,6 +3212,7 @@ def update_config(config_mapping, config, config_data, type="common_config"):
                                 "fish_speech": (switch_webui_show_card_tts_fish_speech, 'bool'),
                                 "chattts": (switch_webui_show_card_tts_chattts, 'bool'),
                                 "cosyvoice": (switch_webui_show_card_tts_cosyvoice, 'bool'),
+                                "f5_tts": (switch_webui_show_card_tts_f5_tts, 'bool'),
                             },
                             "svc": {
                                 "ddsp_svc": (switch_webui_show_card_svc_ddsp_svc, 'bool'),
@@ -3346,6 +3357,7 @@ def save_config():
         'fish_speech': 'fish_speech',
         'chattts': 'ChatTTS',
         'cosyvoice': 'CosyVoice',
+        'f5_tts': 'F5-TTS',
     }
 
     # 聊天类型所有配置项
@@ -6091,7 +6103,36 @@ async def fish_speech_load_model(data):
                                 input_cosyvoice_api_0819_speaker = ui.input(label='说话人', value=config.get("cosyvoice", "api_0819", "speaker"), placeholder='').style("width:200px;").tooltip("自行查看")
                                 input_cosyvoice_api_0819_new = ui.input(label='new', value=config.get("cosyvoice", "api_0819", "new"), placeholder='0').style("width:200px;").tooltip("自行查看")
                                 input_cosyvoice_api_0819_speed = ui.input(label='语速', value=config.get("cosyvoice", "api_0819", "speed"), placeholder='1').style("width:200px;").tooltip("语速")
-
+
+            if config.get("webui", "show_card", "tts", "f5_tts"): 
+                with ui.card().style(card_css):
+                    ui.label("F5-TTS")
+                    with ui.row():
+                        select_f5_tts_type = ui.select(
+                            label='类型', 
+                            options={"gradio_1023": "gradio_1023"}, 
+                            value=config.get("f5_tts", "type")
+                        ).style("width:150px").tooltip("对接的API类型")
+                        input_f5_tts_gradio_ip_port = ui.input(
+                            label='Gradio API地址', 
+                            value=config.get("f5_tts", "gradio_ip_port"), 
+                            placeholder='官方webui程序启动后gradio监听的地址',
+                            validation={
+                                '请输入正确格式的URL': lambda value: common.is_url_check(value),
+                            }
+                        ).style("width:200px;").tooltip("对接webui的gradio接口，填webui的地址")
+
+                        select_f5_tts_model = ui.select(
+                            label='模型', 
+                            options={'F5-TTS': 'F5-TTS', 'E2-TTS': 'E2-TTS'}, 
+                            value=config.get("f5_tts", "model")
+                        ).style("width:100px;")
+                        input_f5_tts_ref_audio_orig = ui.input(label='参考音频路径', value=config.get("f5_tts", "ref_audio_orig"), placeholder='例如：E:\\1.wav').style("width:200px;").tooltip("参考音频路径")
+                        input_f5_tts_ref_text = ui.input(label='参考文本', value=config.get("f5_tts", "ref_text"), placeholder='音频的文本').style("width:200px;").tooltip("参考文本，例如：E:\\1.wav")
+                        switch_f5_tts_remove_silence = ui.switch('remove_silence', value=config.get("f5_tts", "remove_silence")).style(switch_internal_css)
+                        input_f5_tts_cross_fade_duration = ui.input(label='cross_fade_duration', value=config.get("f5_tts", "cross_fade_duration"), placeholder='0.15').style("width:100px;").tooltip("cross_fade_duration")
+                        input_f5_tts_speed = ui.input(label='语速', value=config.get("f5_tts", "speed"), placeholder='语速').style("width:100px;").tooltip("语速，默认：1")
+
         with ui.tab_panel(svc_page).style(tab_panel_css):
             if config.get("webui", "show_card", "svc", "ddsp_svc"):
                 with ui.card().style(card_css):
@@ -7228,6 +7269,7 @@ def update_echart_gift():
                         switch_webui_show_card_tts_fish_speech = ui.switch('fish_speech', value=config.get("webui", "show_card", "tts", "fish_speech")).style(switch_internal_css)
                         switch_webui_show_card_tts_chattts = ui.switch('ChatTTS', value=config.get("webui", "show_card", "tts", "chattts")).style(switch_internal_css)
                         switch_webui_show_card_tts_cosyvoice = ui.switch('CosyVoice', value=config.get("webui", "show_card", "tts", "cosyvoice")).style(switch_internal_css)
+                        switch_webui_show_card_tts_f5_tts = ui.switch('F5-TTS', value=config.get("webui", "show_card", "tts", "f5_tts")).style(switch_internal_css)
 
                 with ui.card().style(card_css):
                     ui.label("变声")