From 72a48b30e8a2f70181d849613281f7d5acda8501 Mon Sep 17 00:00:00 2001 From: ikaros <327209194@qq.com> Date: Sat, 6 Apr 2024 19:50:06 +0800 Subject: [PATCH] =?UTF-8?q?gpt-sovits=E6=96=B0=E5=A2=9E=E5=AF=B9=E5=88=98?= =?UTF-8?q?=E6=82=A6=E4=BD=AC0322=E6=95=B4=E5=90=88=E5=8C=85=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E7=9A=84=E5=85=BC=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.json | 15 +++++ config.json.bak | 15 +++++ utils/audio.py | 2 + utils/audio_handle/my_tts.py | 39 ++++++++++++ webui.py | 119 +++++++++++++++++++++++++++-------- 5 files changed, 163 insertions(+), 27 deletions(-) diff --git a/config.json b/config.json index 9e38cfc9..f331257e 100644 --- a/config.json +++ b/config.json @@ -604,6 +604,21 @@ "cut": "凑四句一切", "gpt_model_path": "F:\\GPT-SoVITS\\GPT_weights\\ikaros-e15.ckpt", "sovits_model_path": "F:\\GPT-SoVITS\\SoVITS_weights\\ikaros_e8_s280.pth", + "api_0322": { + "text_lang": "中英混合", + "ref_audio_path": "F:\\GPT-SoVITS\\raws\\ikaros\\21.wav", + "prompt_text": "マスター、どうりょくろか、いいえ、なんでもありません", + "prompt_lang": "日文", + "top_k": 1, + "top_p": 0.8, + "temperature": 0.8, + "text_split_method": "按标点符号切", + "batch_size": 20, + "speed_factor": 1.0, + "split_bucket": false, + "return_fragment": false, + "fragment_interval": "0.3" + }, "webtts": { "api_ip_port": "http://127.0.0.1:8080", "spk": "sanyueqi", diff --git a/config.json.bak b/config.json.bak index 9e38cfc9..f331257e 100644 --- a/config.json.bak +++ b/config.json.bak @@ -604,6 +604,21 @@ "cut": "凑四句一切", "gpt_model_path": "F:\\GPT-SoVITS\\GPT_weights\\ikaros-e15.ckpt", "sovits_model_path": "F:\\GPT-SoVITS\\SoVITS_weights\\ikaros_e8_s280.pth", + "api_0322": { + "text_lang": "中英混合", + "ref_audio_path": "F:\\GPT-SoVITS\\raws\\ikaros\\21.wav", + "prompt_text": "マスター、どうりょくろか、いいえ、なんでもありません", + "prompt_lang": "日文", + "top_k": 1, + "top_p": 0.8, + "temperature": 0.8, + "text_split_method": "按标点符号切", + "batch_size": 20, + "speed_factor": 1.0, + "split_bucket": false, + "return_fragment": false, + "fragment_interval": "0.3" + }, "webtts": { "api_ip_port": "http://127.0.0.1:8080", "spk": "sanyueqi", diff --git a/utils/audio.py b/utils/audio.py index 10249f16..77ef1c32 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -795,6 +795,7 @@ async def tts_handle(self, message): "prompt_language": message["data"]["prompt_language"], "language": language, "cut": message["data"]["cut"], + "api_0322": message["data"]["api_0322"], "webtts": message["data"]["webtts"], "content": message["content"] } @@ -1631,6 +1632,7 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type=" "prompt_language": self.config.get("gpt_sovits", "prompt_language"), "language": language, "cut": self.config.get("gpt_sovits", "cut"), + "api_0322": self.config.get("gpt_sovits", "api_0322"), "webtts": self.config.get("gpt_sovits", "webtts"), "content": content } diff --git a/utils/audio_handle/my_tts.py b/utils/audio_handle/my_tts.py index 57e27351..319df7c5 100644 --- a/utils/audio_handle/my_tts.py +++ b/utils/audio_handle/my_tts.py @@ -707,6 +707,45 @@ async def websocket_client_logic(websocket, data_json): except Exception as e: logging.error(traceback.format_exc()) logging.error(f'gpt_sovits未知错误: {e}') + elif data["type"] == "api_0322": + try: + + data_json = { + "text": data["content"], + "text_lang": data["api_0322"]["text_lang"], + "ref_audio_path": data["api_0322"]["ref_audio_path"], + "prompt_text": data["api_0322"]["prompt_text"], + "prompt_lang": data["api_0322"]["prompt_lang"], + "top_k": data["api_0322"]["top_k"], + "top_p": data["api_0322"]["top_p"], + "temperature": data["api_0322"]["temperature"], + "text_split_method": data["api_0322"]["text_split_method"], + "batch_size":int(data["api_0322"]["batch_size"]), + "speed_factor":float(data["api_0322"]["speed_factor"]), + "split_bucket":data["api_0322"]["split_bucket"], + "return_fragment":data["api_0322"]["return_fragment"], + "fragment_interval":data["api_0322"]["fragment_interval"], + } + + async with aiohttp.ClientSession() as session: + async with session.post(data["api_ip_port"], json=data_json, timeout=self.timeout) as response: + response = await response.read() + + file_name = 'gpt_sovits_' + self.common.get_bj_time(4) + '.wav' + + voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name) + + with open(voice_tmp_path, 'wb') as f: + f.write(response) + + return voice_tmp_path + except aiohttp.ClientError as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误: {e}') + elif data["type"] == "webtts": try: # 使用字典推导式构建 params 字典,只包含非空字符串的值 diff --git a/webui.py b/webui.py index f6591800..b6e107e9 100644 --- a/webui.py +++ b/webui.py @@ -1928,6 +1928,21 @@ def common_textarea_handle(content): config_data["gpt_sovits"]["cut"] = select_gpt_sovits_cut.value config_data["gpt_sovits"]["gpt_model_path"] = input_gpt_sovits_gpt_model_path.value config_data["gpt_sovits"]["sovits_model_path"] = input_gpt_sovits_sovits_model_path.value + + config_data["gpt_sovits"]["api_0322"]["ref_audio_path"] = input_gpt_sovits_api_0322_ref_audio_path.value + config_data["gpt_sovits"]["api_0322"]["prompt_text"] = input_gpt_sovits_api_0322_prompt_text.value + config_data["gpt_sovits"]["api_0322"]["prompt_lang"] = select_gpt_sovits_api_0322_prompt_lang.value + config_data["gpt_sovits"]["api_0322"]["text_lang"] = select_gpt_sovits_api_0322_text_lang.value + config_data["gpt_sovits"]["api_0322"]["text_split_method"] = select_gpt_sovits_api_0322_text_split_method.value + config_data["gpt_sovits"]["api_0322"]["top_k"] = int(input_gpt_sovits_api_0322_top_k.value) + config_data["gpt_sovits"]["api_0322"]["top_p"] = round(float(input_gpt_sovits_api_0322_top_p.value), 2) + config_data["gpt_sovits"]["api_0322"]["temperature"] = round(float(input_gpt_sovits_api_0322_temperature.value), 2) + config_data["gpt_sovits"]["api_0322"]["batch_size"] = int(input_gpt_sovits_api_0322_batch_size.value) + config_data["gpt_sovits"]["api_0322"]["speed_factor"] = round(float(input_gpt_sovits_api_0322_speed_factor.value), 2) + config_data["gpt_sovits"]["api_0322"]["fragment_interval"] = input_gpt_sovits_api_0322_fragment_interval.value + config_data["gpt_sovits"]["api_0322"]["split_bucket"] = switch_gpt_sovits_api_0322_split_bucket.value + config_data["gpt_sovits"]["api_0322"]["return_fragment"] = switch_gpt_sovits_api_0322_return_fragment.value + config_data["gpt_sovits"]["webtts"]["spk"] = input_gpt_sovits_webtts_spk.value config_data["gpt_sovits"]["webtts"]["lang"] = select_gpt_sovits_webtts_lang.value config_data["gpt_sovits"]["webtts"]["speed"] = input_gpt_sovits_webtts_speed.value @@ -3942,41 +3957,91 @@ def vits_get_speaker_id(): with ui.row(): select_gpt_sovits_type = ui.select( label='API类型', - options={'gradio':'gradio', 'api':'api', 'webtts':'WebTTS'}, + options={'gradio':'gradio旧版', 'api':'api', 'api_0322':'api_0322', 'webtts':'WebTTS'}, value=config.get("gpt_sovits", "type") ).style("width:100px;") input_gpt_sovits_ws_ip_port = ui.input(label='WS地址(gradio)', value=config.get("gpt_sovits", "ws_ip_port"), placeholder='启动TTS推理后,ws的接口地址').style("width:200px;") input_gpt_sovits_api_ip_port = ui.input(label='API地址(http)', value=config.get("gpt_sovits", "api_ip_port"), placeholder='官方API程序启动后监听的地址').style("width:200px;") - with ui.row(): - input_gpt_sovits_ref_audio_path = ui.input(label='参考音频路径', value=config.get("gpt_sovits", "ref_audio_path"), placeholder='参考音频路径,建议填绝对路径').style("width:300px;") - input_gpt_sovits_prompt_text = ui.input(label='参考音频的文本', value=config.get("gpt_sovits", "prompt_text"), placeholder='参考音频的文本').style("width:200px;") - select_gpt_sovits_prompt_language = ui.select( - label='参考音频的语种', - options={'中文':'中文', '日文':'日文', '英文':'英文'}, - value=config.get("gpt_sovits", "prompt_language") - ).style("width:150px;") - select_gpt_sovits_language = ui.select( - label='需要合成的语种', - options={'自动识别':'自动识别', '中文':'中文', '日文':'日文', '英文':'英文'}, - value=config.get("gpt_sovits", "language") - ).style("width:150px;") - select_gpt_sovits_cut = ui.select( - label='语句切分', - options={ - '不切':'不切', - '凑四句一切':'凑四句一切', - '凑50字一切':'凑50字一切', - '按中文句号。切':'按中文句号。切', - '按英文句号.切':'按英文句号.切', - '按标点符号切':'按标点符号切' - }, - value=config.get("gpt_sovits", "cut") - ).style("width:200px;") + with ui.row(): input_gpt_sovits_gpt_model_path = ui.input(label='GPT模型路径', value=config.get("gpt_sovits", "gpt_model_path"), placeholder='GPT模型路径,填绝对路径').style("width:300px;") input_gpt_sovits_sovits_model_path = ui.input(label='SOVITS模型路径', value=config.get("gpt_sovits", "sovits_model_path"), placeholder='SOVITS模型路径,填绝对路径').style("width:300px;") button_gpt_sovits_set_model = ui.button('加载模型', on_click=gpt_sovits_set_model, color=button_internal_color).style(button_internal_css) - + + with ui.card().style(card_css): + ui.label("api") + with ui.row(): + input_gpt_sovits_ref_audio_path = ui.input(label='参考音频路径', value=config.get("gpt_sovits", "ref_audio_path"), placeholder='参考音频路径,建议填绝对路径').style("width:300px;") + input_gpt_sovits_prompt_text = ui.input(label='参考音频的文本', value=config.get("gpt_sovits", "prompt_text"), placeholder='参考音频的文本').style("width:200px;") + select_gpt_sovits_prompt_language = ui.select( + label='参考音频的语种', + options={'中文':'中文', '日文':'日文', '英文':'英文'}, + value=config.get("gpt_sovits", "prompt_language") + ).style("width:150px;") + select_gpt_sovits_language = ui.select( + label='需要合成的语种', + options={'自动识别':'自动识别', '中文':'中文', '日文':'日文', '英文':'英文'}, + value=config.get("gpt_sovits", "language") + ).style("width:150px;") + select_gpt_sovits_cut = ui.select( + label='语句切分', + options={ + '不切':'不切', + '凑四句一切':'凑四句一切', + '凑50字一切':'凑50字一切', + '按中文句号。切':'按中文句号。切', + '按英文句号.切':'按英文句号.切', + '按标点符号切':'按标点符号切' + }, + value=config.get("gpt_sovits", "cut") + ).style("width:200px;") + + with ui.card().style(card_css): + ui.label("api_0322") + with ui.row(): + input_gpt_sovits_api_0322_ref_audio_path = ui.input(label='参考音频路径', value=config.get("gpt_sovits", "api_0322", "ref_audio_path"), placeholder='参考音频路径,建议填绝对路径').style("width:300px;") + input_gpt_sovits_api_0322_prompt_text = ui.input(label='参考音频的文本', value=config.get("gpt_sovits", "api_0322", "prompt_text"), placeholder='参考音频的文本').style("width:200px;") + select_gpt_sovits_api_0322_prompt_lang = ui.select( + label='参考音频的语种', + options={'中文':'中文', '日文':'日文', '英文':'英文'}, + value=config.get("gpt_sovits", "api_0322", "prompt_lang") + ).style("width:150px;") + select_gpt_sovits_api_0322_text_lang = ui.select( + label='需要合成的语种', + options={ + '自动识别':'自动识别', + '中文':'中文', + '日文':'日文', + '英文':'英文', + '中英混合': '中英混合', + '日英混合': '日英混合', + '多语种混合': '多语种混合', + }, + value=config.get("gpt_sovits", "api_0322", "text_lang") + ).style("width:150px;") + select_gpt_sovits_api_0322_text_split_method = ui.select( + label='语句切分', + options={ + '不切':'不切', + '凑四句一切':'凑四句一切', + '凑50字一切':'凑50字一切', + '按中文句号。切':'按中文句号。切', + '按英文句号.切':'按英文句号.切', + '按标点符号切':'按标点符号切' + }, + value=config.get("gpt_sovits", "api_0322", "text_split_method") + ).style("width:200px;") + with ui.row(): + input_gpt_sovits_api_0322_top_k = ui.input(label='top_k', value=config.get("gpt_sovits", "api_0322", "top_k"), placeholder='top_k').style("width:100px;") + input_gpt_sovits_api_0322_top_p = ui.input(label='top_p', value=config.get("gpt_sovits", "api_0322", "top_p"), placeholder='top_p').style("width:100px;") + input_gpt_sovits_api_0322_temperature = ui.input(label='temperature', value=config.get("gpt_sovits", "api_0322", "temperature"), placeholder='temperature').style("width:100px;") + input_gpt_sovits_api_0322_batch_size = ui.input(label='batch_size', value=config.get("gpt_sovits", "api_0322", "batch_size"), placeholder='batch_size').style("width:100px;") + input_gpt_sovits_api_0322_speed_factor = ui.input(label='speed_factor', value=config.get("gpt_sovits", "api_0322", "speed_factor"), placeholder='speed_factor').style("width:100px;") + input_gpt_sovits_api_0322_fragment_interval = ui.input(label='分段间隔(秒)', value=config.get("gpt_sovits", "api_0322", "fragment_interval"), placeholder='fragment_interval').style("width:100px;") + switch_gpt_sovits_api_0322_split_bucket = ui.switch('split_bucket', value=config.get("gpt_sovits", "api_0322", "split_bucket")).style(switch_internal_css) + switch_gpt_sovits_api_0322_return_fragment = ui.switch('return_fragment', value=config.get("gpt_sovits", "api_0322", "return_fragment")).style(switch_internal_css) + + with ui.card().style(card_css): ui.label("WebTTS相关配置") with ui.row():