From cd088d6121fa9fd841ecbfada6bad73144f22d34 Mon Sep 17 00:00:00 2001 From: ikaros <327209194@qq.com> Date: Sun, 21 Jul 2024 14:59:40 +0800 Subject: [PATCH] =?UTF-8?q?=E5=85=BC=E5=AE=B9gpt=20sovits=E5=AE=98?= =?UTF-8?q?=E6=96=B9=E6=95=B4=E5=90=88=E5=8C=850706?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + config.json | 7 + config.json.bak | 7 + tests/test_gpt_sovits/api_0706.py | 229 ++++++++++++++++++++++++++++++ utils/audio.py | 5 + utils/audio_handle/my_tts.py | 19 +++ webui.py | 35 ++++- 7 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 tests/test_gpt_sovits/api_0706.py diff --git a/README.md b/README.md index 2560fcb0..381a94c7 100644 --- a/README.md +++ b/README.md @@ -83,4 +83,5 @@ AIHubMix: [aihubmix.com](https://aihubmix.com/register?aff=1BMI) ———— O | 用户信息 | 名人名言 | |--------|------| | QQ:750359376 | 笑死,连点开源精神都没有 | +| QQ:378198682 | 【散播谣言】 | diff --git a/config.json b/config.json index 236798a2..7414bbf7 100644 --- a/config.json +++ b/config.json @@ -767,6 +767,13 @@ "return_fragment": false, "fragment_interval": "0.3" }, + "api_0706": { + "refer_wav_path": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav", + "text_language": "中文", + "prompt_text": "整整策划了半年了,终于现在有结果了", + "prompt_language": "中文", + "cut_punc": ",。" + }, "webtts": { "version": "1", "api_ip_port": "http://127.0.0.1:8080", diff --git a/config.json.bak b/config.json.bak index 236798a2..7414bbf7 100644 --- a/config.json.bak +++ b/config.json.bak @@ -767,6 +767,13 @@ "return_fragment": false, "fragment_interval": "0.3" }, + "api_0706": { + "refer_wav_path": "F:\\GPT-SoVITS-0304\\output\\slicer_opt\\smoke1.wav", + "text_language": "中文", + "prompt_text": "整整策划了半年了,终于现在有结果了", + "prompt_language": "中文", + "cut_punc": ",。" + }, "webtts": { "version": "1", "api_ip_port": "http://127.0.0.1:8080", diff --git a/tests/test_gpt_sovits/api_0706.py b/tests/test_gpt_sovits/api_0706.py new file mode 100644 index 00000000..d8790e9d --- /dev/null +++ b/tests/test_gpt_sovits/api_0706.py @@ -0,0 +1,229 @@ +import logging, json, aiohttp, os, traceback +import base64 +import mimetypes +import websockets +import asyncio + +async def gpt_sovits_api(data): + + + def file_to_data_url(file_path): + # 根据文件扩展名确定 MIME 类型 + mime_type, _ = mimetypes.guess_type(file_path) + + # 读取文件内容 + with open(file_path, "rb") as file: + file_content = file.read() + + # 转换为 Base64 编码 + base64_encoded_data = base64.b64encode(file_content).decode('utf-8') + + # 构造完整的 Data URL + return f"data:{mime_type};base64,{base64_encoded_data}" + + async def websocket_client(data_json): + try: + async with websockets.connect(data["ws_ip_port"]) as websocket: + # 设置最大连接时长(例如 30 秒) + return await asyncio.wait_for(websocket_client_logic(websocket, data_json), timeout=30) + except asyncio.TimeoutError: + logging.error("gpt_sovits WebSocket连接超时") + return None + + async def websocket_client_logic(websocket, data_json): + async for message in websocket: + logging.debug(f"Received message: {message}") + + # 解析收到的消息 + data = json.loads(message) + # 检查是否是预期的消息 + if "msg" in data: + if data["msg"] == "send_hash": + # 发送响应消息 + response = json.dumps({"session_hash":"3obpzfqql7f","fn_index":3}) + await websocket.send(response) + logging.debug(f"Sent message: {response}") + elif data["msg"] == "send_data": + # audio_path = "F:\\GPT-SoVITS\\raws\\ikaros\\1.wav" + audio_path = data_json["ref_audio_path"] + + # 发送响应消息 + response = json.dumps( + { + "session_hash":"3obpzfqql7f", + "fn_index":3, + "data":[ + { + "data": file_to_data_url(audio_path), + "name": os.path.basename(audio_path) + }, + data_json["prompt_text"], + data_json["prompt_language"], + data_json["content"], + data_json["language"], + data_json["cut"] + ] + } + ) + await websocket.send(response) + logging.debug(f"Sent message: {response}") + elif data["msg"] == "process_completed": + return data["output"]["data"][0]["name"] + + try: + logging.debug(f"data={data}") + + if data["type"] == "gradio": + # 调用函数并等待结果 + voice_tmp_path = await websocket_client(data) + + # if voice_tmp_path: + # new_file_path = self.common.move_file(voice_tmp_path, os.path.join(self.audio_out_path, 'gpt_sovits_' + self.common.get_bj_time(4)), 'gpt_sovits_' + self.common.get_bj_time(4)) + + new_file_path = 'gpt_sovits_.wav' + + return new_file_path + elif data["type"] == "api": + try: + data_json = { + "refer_wav_path": data["ref_audio_path"], + "prompt_text": data["prompt_text"], + "prompt_language": data["prompt_language"], + "text": data["content"], + "text_language": data["language"] + } + + async with aiohttp.ClientSession() as session: + async with session.post(data["api_ip_port"], json=data_json, timeout=30) as response: + response = await response.read() + + file_name = 'gpt_sovits_.wav' + + voice_tmp_path = file_name + + # voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name) + + with open(voice_tmp_path, 'wb') as f: + f.write(response) + + return voice_tmp_path + except aiohttp.ClientError as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误: {e}') + elif data["type"] == "webtts": + try: + # 使用字典推导式构建 params 字典,只包含非空字符串的值 + params = { + key: value + for key, value in data["webtts"].items() + if value != "" + if key != "api_ip_port" + } + + # params["speed"] = self.get_random_float(params["speed"]) + params["text"] = data["content"] + + async with aiohttp.ClientSession() as session: + async with session.get(data["webtts"]["api_ip_port"], params=params, timeout=30) as response: + response = await response.read() + + file_name = 'gpt_sovits_.wav' + + voice_tmp_path = file_name + + # voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name) + + with open(voice_tmp_path, 'wb') as f: + f.write(response) + + return voice_tmp_path + except aiohttp.ClientError as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误: {e}') + elif data["type"] == "api_0706": + try: + data_json = { + "refer_wav_path": data["ref_audio_path"], + "prompt_text": data["prompt_text"], + "prompt_language": data["prompt_language"], + "text": data["content"], + "text_language": data["language"] + } + + async with aiohttp.ClientSession() as session: + async with session.post(data["api_ip_port"], json=data_json, timeout=30) as response: + response = await response.read() + + file_name = 'gpt_sovits_.wav' + + voice_tmp_path = file_name + + # voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name) + + with open(voice_tmp_path, 'wb') as f: + f.write(response) + + return voice_tmp_path + except aiohttp.ClientError as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误: {e}') + + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误,请检查您的gpt_sovits推理是否启动/配置是否正确,报错内容: {e}') + + return None + + +async def gpt_sovits_set_model(data): + from urllib.parse import urljoin + + if data["type"] == "api": + try: + data_json = { + "gpt_model_path": data["gpt_model_path"], + "sovits_model_path": data["sovits_model_path"] + } + + API_URL = urljoin(data["api_ip_port"], '/set_model') + + async with aiohttp.ClientSession() as session: + async with session.post(API_URL, json=data_json, timeout=30) as response: + response = await response.read() + + print(response) + + return response + except aiohttp.ClientError as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logging.error(traceback.format_exc()) + logging.error(f'gpt_sovits未知错误: {e}') + + +if __name__ == '__main__': + # 配置日志输出格式 + logging.basicConfig( + level=logging.DEBUG, # 设置日志级别,可以根据需求调整 + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + data = { + "type": "api", + "api_ip_port": "http://127.0.0.1:9880", + "gpt_model_path": "F:\GPT-SoVITS\GPT_weights\ikaros-e15.ckpt", + "sovits_model_path": "F:\GPT-SoVITS\SoVITS_weights\ikaros_e8_s280.pth" + } + + asyncio.run(gpt_sovits_set_model(data)) diff --git a/utils/audio.py b/utils/audio.py index 62f08092..9c663e3e 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -1008,6 +1008,9 @@ async def tts_handle(self, message): else: message["data"]["api_0322"]["text_lang"] = "中文" # 无法识别出语言代码时的默认值 + if message["data"]["api_0706"]["text_language"] == "自动识别": + message["data"]["api_0706"]["text_language"] = "auto" + data = { "type": message["data"]["type"], "gradio_ip_port": message["data"]["gradio_ip_port"], @@ -1019,6 +1022,7 @@ async def tts_handle(self, message): "language": language, "cut": message["data"]["cut"], "api_0322": message["data"]["api_0322"], + "api_0706": message["data"]["api_0706"], "webtts": message["data"]["webtts"], "content": message["content"] } @@ -1956,6 +1960,7 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type=" "language": language, "cut": self.config.get("gpt_sovits", "cut"), "api_0322": self.config.get("gpt_sovits", "api_0322"), + "api_0706": self.config.get("gpt_sovits", "api_0706"), "webtts": self.config.get("gpt_sovits", "webtts"), "content": content } diff --git a/utils/audio_handle/my_tts.py b/utils/audio_handle/my_tts.py index 25bcfcb6..a90202da 100644 --- a/utils/audio_handle/my_tts.py +++ b/utils/audio_handle/my_tts.py @@ -751,6 +751,25 @@ async def websocket_client_logic(websocket, data_json): "fragment_interval":data["api_0322"]["fragment_interval"], } + return await self.download_audio("gpt_sovits", data["api_ip_port"], self.timeout, "post", None, data_json) + except aiohttp.ClientError as e: + logger.error(traceback.format_exc()) + logger.error(f'gpt_sovits请求失败: {e}') + except Exception as e: + logger.error(traceback.format_exc()) + logger.error(f'gpt_sovits未知错误: {e}') + elif data["type"] == "api_0706": + try: + + data_json = { + "text": data["content"], + "refer_wav_path": data["api_0706"]["refer_wav_path"], + "text_language": data["api_0706"]["text_language"], + "prompt_text": data["api_0706"]["prompt_text"], + "prompt_language": data["api_0706"]["prompt_language"], + "cut_punc": data["api_0706"]["cut_punc"], + } + return await self.download_audio("gpt_sovits", data["api_ip_port"], self.timeout, "post", None, data_json) except aiohttp.ClientError as e: logger.error(traceback.format_exc()) diff --git a/webui.py b/webui.py index 1130bd6a..adb6e9e7 100644 --- a/webui.py +++ b/webui.py @@ -2287,6 +2287,12 @@ def common_textarea_handle(content): config_data["gpt_sovits"]["api_0322"]["split_bucket"] = switch_gpt_sovits_api_0322_split_bucket.value config_data["gpt_sovits"]["api_0322"]["return_fragment"] = switch_gpt_sovits_api_0322_return_fragment.value + config_data["gpt_sovits"]["api_0706"]["refer_wav_path"] = input_gpt_sovits_api_0706_refer_wav_path.value + config_data["gpt_sovits"]["api_0706"]["prompt_text"] = input_gpt_sovits_api_0706_prompt_text.value + config_data["gpt_sovits"]["api_0706"]["prompt_language"] = select_gpt_sovits_api_0706_prompt_language.value + config_data["gpt_sovits"]["api_0706"]["text_language"] = select_gpt_sovits_api_0706_text_language.value + config_data["gpt_sovits"]["api_0706"]["cut_punc"] = input_gpt_sovits_api_0706_cut_punc.value + config_data["gpt_sovits"]["webtts"]["version"] = select_gpt_sovits_webtts_version.value config_data["gpt_sovits"]["webtts"]["api_ip_port"] = input_gpt_sovits_webtts_api_ip_port.value config_data["gpt_sovits"]["webtts"]["spk"] = input_gpt_sovits_webtts_spk.value @@ -4987,7 +4993,7 @@ def vits_get_speaker_id(): with ui.row(): select_gpt_sovits_type = ui.select( label='API类型', - options={'gradio':'gradio旧版', 'gradio_0322':'gradio_0322', 'api':'api', 'api_0322':'api_0322', 'webtts':'WebTTS'}, + options={'api':'api', 'api_0322':'api_0322', 'api_0706':'api_0706', 'webtts':'WebTTS', 'gradio':'gradio旧版', 'gradio_0322':'gradio_0322'}, value=config.get("gpt_sovits", "type") ).style("width:100px;") input_gpt_sovits_gradio_ip_port = ui.input( @@ -5086,7 +5092,32 @@ def vits_get_speaker_id(): input_gpt_sovits_api_0322_fragment_interval = ui.input(label='分段间隔(秒)', value=config.get("gpt_sovits", "api_0322", "fragment_interval"), placeholder='fragment_interval').style("width:100px;") switch_gpt_sovits_api_0322_split_bucket = ui.switch('split_bucket', value=config.get("gpt_sovits", "api_0322", "split_bucket")).style(switch_internal_css) switch_gpt_sovits_api_0322_return_fragment = ui.switch('return_fragment', value=config.get("gpt_sovits", "api_0322", "return_fragment")).style(switch_internal_css) - + + with ui.card().style(card_css): + ui.label("api_0706") + with ui.row(): + input_gpt_sovits_api_0706_refer_wav_path = ui.input(label='参考音频路径', value=config.get("gpt_sovits", "api_0706", "refer_wav_path"), placeholder='参考音频路径,建议填绝对路径').style("width:300px;") + input_gpt_sovits_api_0706_prompt_text = ui.input(label='参考音频的文本', value=config.get("gpt_sovits", "api_0706", "prompt_text"), placeholder='参考音频的文本').style("width:200px;") + select_gpt_sovits_api_0706_prompt_language = ui.select( + label='参考音频的语种', + options={'中文':'中文', '日文':'日文', '英文':'英文'}, + value=config.get("gpt_sovits", "api_0706", "prompt_language") + ).style("width:150px;") + select_gpt_sovits_api_0706_text_language = ui.select( + label='需要合成的语种', + options={ + '自动识别':'自动识别', + '中文':'中文', + '日文':'日文', + '英文':'英文', + '中英混合': '中英混合', + '日英混合': '日英混合', + '多语种混合': '多语种混合', + }, + value=config.get("gpt_sovits", "api_0706", "text_language") + ).style("width:150px;") + input_gpt_sovits_api_0706_cut_punc = ui.input(label='文本切分', value=config.get("gpt_sovits", "api_0706", "cut_punc"), placeholder='文本切分符号设定, 符号范围,.;?!、,。?!;:…').style("width:200px;") + with ui.card().style(card_css): ui.label("WebTTS相关配置")