Skip to content

Commit

Permalink
vits新增gpt-sovits的兼容(vits-simple-api)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Mar 25, 2024
1 parent bc828db commit b63d205
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 8 deletions.
15 changes: 14 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,20 @@
"noisew": "0.4",
"max": "50",
"format": "wav",
"sdp_radio": "0.2"
"sdp_radio": "0.2",
"gpt_sovits": {
"id": "0",
"format": "wav",
"lang": "auto",
"segment_size": "30",
"reference_audio": "E:\\GitHub_pro\\AI-Vtuber\\out\\gpt_sovits_67.wav",
"prompt_text": "所有拍到的姐妹一定不要划走",
"prompt_lang": "auto",
"preset": "default",
"top_k": "5",
"top_p": "1",
"temperature": "1"
}
},
"bert_vits2": {
"type": "hiyori",
Expand Down
15 changes: 14 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,20 @@
"noisew": "0.4",
"max": "50",
"format": "wav",
"sdp_radio": "0.2"
"sdp_radio": "0.2",
"gpt_sovits": {
"id": "0",
"format": "wav",
"lang": "auto",
"segment_size": "30",
"reference_audio": "E:\\GitHub_pro\\AI-Vtuber\\out\\gpt_sovits_67.wav",
"prompt_text": "所有拍到的姐妹一定不要划走",
"prompt_lang": "auto",
"preset": "default",
"top_k": "5",
"top_p": "1",
"temperature": "1"
}
},
"bert_vits2": {
"type": "hiyori",
Expand Down
95 changes: 95 additions & 0 deletions tests/test_vits/gpt-sovits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import logging, asyncio, aiohttp, traceback, os
from aiohttp import FormData
from urllib.parse import urlencode, urljoin

class TTS:
def __init__(self):
self.timeout = 60

# 请求vits_simple_api的api gpt_sovits
async def vits_simple_api_gpt_sovits_api(self, data):
try:
logging.debug(f"data={data}")
# API地址 "http://127.0.0.1:5000/voice"
API_URL = urljoin(data["api_ip_port"], '/voice/gpt-sovits')

data_json = {
"text": data["content"],
"id": data["id"],
"format": data["format"],
"lang": data["lang"],
"segment_size": data["segment_size"],
"prompt_text": data["prompt_text"],
"prompt_lang": data["prompt_lang"],
"preset": data["preset"],
"top_k": data["top_k"],
"top_p": data["top_p"],
"temperature": data["temperature"]
}

# 创建 FormData 对象
form_data = FormData()
# 添加文本字段
for key, value in data_json.items():
form_data.add_field(key, str(value))

# 以二进制读取模式打开音频文件,并添加到表单数据中
# 'reference_audio' 是字段名称,应与服务器端接收的名称一致
form_data.add_field('reference_audio',
open(data["reference_audio"], 'rb'),
content_type='audio/mpeg') # 内容类型根据文件类型修改

logging.info(f"data_json={data_json}")
# logging.info(f"data={data}")

logging.info(f"API_URL={API_URL}")

# url = f"{API_URL}?{urlencode(data_json)}"

async with aiohttp.ClientSession() as session:
async with session.post(API_URL, data=form_data, timeout=self.timeout) as response:
response = await response.read()
# print(response)
# file_name = 'vits_simple_api_gpt_sovits_' + self.common.get_bj_time(4) + '.wav'
# voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name)
voice_tmp_path = '1.wav'
with open(voice_tmp_path, 'wb') as f:
f.write(response)

return voice_tmp_path
except aiohttp.ClientError as e:
logging.error(traceback.format_exc())
logging.error(f'vits_simple_api gpt_sovits请求失败,请检查您的vits_simple_api是否启动/配置是否正确,报错内容: {e}')
except Exception as e:
logging.error(traceback.format_exc())
logging.error(f'vits_simple_api gpt_sovits未知错误,请检查您的vits_simple_api是否启动/配置是否正确,报错内容: {e}')

return None

if __name__ == '__main__':
# 配置日志输出格式
logging.basicConfig(
level=logging.DEBUG, # 设置日志级别,可以根据需求调整
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)

data = {
"api_ip_port": "http://127.0.0.1:23456/",
"content": "你好,你在说什么玩意,啊啊啊啊",
"id": 0,
"format": "wav",
"lang": "auto",
"segment_size": 30,
"reference_audio": "E:\\GitHub_pro\\AI-Vtuber\\out\\gpt_sovits_67.wav",
"prompt_text": "所有拍到的姐妹一定不要划走",
"prompt_lang": "auto",
"preset": "default",
"top_k": 5,
"top_p": 1,
"temperature": 1
}
asyncio.run(TTS().vits_simple_api_gpt_sovits_api(data))



7 changes: 5 additions & 2 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,11 +660,13 @@ async def voice_change_and_put_to_queue(message, voice_tmp_path):
"noisew": message["data"]["noisew"],
"max": message["data"]["max"],
"sdp_radio": message["data"]["sdp_radio"],
"content": message["content"]
"content": message["content"],
"gpt_sovits": message["data"]["gpt_sovits"],
}

# 调用接口合成语音
voice_tmp_path = await self.my_tts.vits_api(data)

elif message["tts_type"] == "bert_vits2":
if message["data"]["type"] == "hiyori":
if message["data"]["language"] == "auto":
Expand Down Expand Up @@ -1381,7 +1383,8 @@ async def audio_synthesis_use_local_config(self, content, audio_synthesis_type="
"noisew": vits["noisew"],
"max": vits["max"],
"sdp_radio": vits["sdp_radio"],
"content": content
"content": content,
"gpt_sovits": vits["gpt_sovits"],
}

# 调用接口合成语音
Expand Down
66 changes: 65 additions & 1 deletion utils/audio_handle/my_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,69 @@ async def vits_api(self, data):
data_json["lang"] = "auto"
else:
data_json["lang"] = "auto"

elif data["type"] == "gpt_sovits":
# 请求vits_simple_api的api gpt_sovits
async def vits_simple_api_gpt_sovits_api(data):
try:
from aiohttp import FormData

logging.debug(f"data={data}")
API_URL = urljoin(data["api_ip_port"], '/voice/gpt-sovits')


data_json = {
"text": data["content"],
"id": data["gpt_sovits"]["id"],
"format": data["gpt_sovits"]["format"],
"lang": data["gpt_sovits"]["lang"],
"segment_size": data["gpt_sovits"]["segment_size"],
"prompt_text": data["gpt_sovits"]["prompt_text"],
"prompt_lang": data["gpt_sovits"]["prompt_lang"],
"preset": data["gpt_sovits"]["preset"],
"top_k": data["gpt_sovits"]["top_k"],
"top_p": data["gpt_sovits"]["top_p"],
"temperature": data["gpt_sovits"]["temperature"]
}

# 创建 FormData 对象
form_data = FormData()
# 添加文本字段
for key, value in data_json.items():
form_data.add_field(key, str(value))

# 以二进制读取模式打开音频文件,并添加到表单数据中
# 'reference_audio' 是字段名称,应与服务器端接收的名称一致
form_data.add_field('reference_audio',
open(data["gpt_sovits"]["reference_audio"], 'rb'),
content_type='audio/mpeg') # 内容类型根据文件类型修改

logging.debug(f"data_json={data_json}")

logging.debug(f"API_URL={API_URL}")

async with aiohttp.ClientSession() as session:
async with session.post(API_URL, data=form_data, timeout=self.timeout) as response:
response = await response.read()
# print(response)
file_name = 'vits_simple_api_' + self.common.get_bj_time(4) + '.wav'
voice_tmp_path = self.common.get_new_audio_path(self.audio_out_path, file_name)

with open(voice_tmp_path, 'wb') as f:
f.write(response)

return voice_tmp_path
except aiohttp.ClientError as e:
logging.error(traceback.format_exc())
logging.error(f'vits_simple_api gpt_sovits请求失败,请检查您的vits_simple_api是否启动/配置是否正确,报错内容: {e}')
except Exception as e:
logging.error(traceback.format_exc())
logging.error(f'vits_simple_api gpt_sovits未知错误,请检查您的vits_simple_api是否启动/配置是否正确,报错内容: {e}')

return None

voice_tmp_path = await vits_simple_api_gpt_sovits_api(data)
return voice_tmp_path

# logging.info(f"data_json={data_json}")
# logging.info(f"data={data}")

Expand Down Expand Up @@ -824,3 +886,5 @@ def replace_empty_strings_with_none(input_dict):

return None



43 changes: 40 additions & 3 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,6 +1566,18 @@ def common_textarea_handle(content):
config_data["vits"]["format"] = input_vits_format.value
config_data["vits"]["sdp_radio"] = input_vits_sdp_radio.value

config_data["vits"]["gpt_sovits"]["id"] = input_vits_gpt_sovits_id.value
config_data["vits"]["gpt_sovits"]["lang"] = select_vits_gpt_sovits_lang.value
config_data["vits"]["gpt_sovits"]["format"] = input_vits_gpt_sovits_format.value
config_data["vits"]["gpt_sovits"]["segment_size"] = input_vits_gpt_sovits_segment_size.value
config_data["vits"]["gpt_sovits"]["reference_audio"] = input_vits_gpt_sovits_reference_audio.value
config_data["vits"]["gpt_sovits"]["prompt_text"] = input_vits_gpt_sovits_prompt_text.value
config_data["vits"]["gpt_sovits"]["prompt_lang"] = select_vits_gpt_sovits_prompt_lang.value
config_data["vits"]["gpt_sovits"]["top_k"] = input_vits_gpt_sovits_top_k.value
config_data["vits"]["gpt_sovits"]["top_p"] = input_vits_gpt_sovits_top_p.value
config_data["vits"]["gpt_sovits"]["temperature"] = input_vits_gpt_sovits_temperature.value
config_data["vits"]["gpt_sovits"]["preset"] = input_vits_gpt_sovits_preset.value

if config.get("webui", "show_card", "tts", "bert_vits2"):
config_data["bert_vits2"]["type"] = select_bert_vits2_type.value
config_data["bert_vits2"]["api_ip_port"] = input_bert_vits2_api_ip_port.value
Expand Down Expand Up @@ -3356,11 +3368,11 @@ def clear_tts_common_audio_card(file_path):

if config.get("webui", "show_card", "tts", "vits"):
with ui.card().style(card_css):
ui.label("VITS")
ui.label("VITS-Simple-API")
with ui.row():
select_vits_type = ui.select(
label='类型',
options={'vits': 'vits', 'bert_vits2': 'bert_vits2'},
options={'vits': 'vits', 'bert_vits2': 'bert_vits2', 'gpt_sovits': 'gpt_sovits'},
value=config.get("vits", "type")
).style("width:200px;")
input_vits_config_path = ui.input(label='配置文件路径', placeholder='模型配置文件存储路径', value=config.get("vits", "config_path")).style("width:200px;")
Expand All @@ -3385,7 +3397,32 @@ def clear_tts_common_audio_card(file_path):
input_vits_format = ui.input(label='音频格式', placeholder='支持wav,ogg,silk,mp3,flac', value=config.get("vits", "format")).style("width:200px;")

input_vits_sdp_radio = ui.input(label='SDP/DP混合比', placeholder='SDP/DP混合比:SDP在合成时的占比,理论上此比率越高,合成的语音语调方差越大。', value=config.get("vits", "sdp_radio")).style("width:200px;")


with ui.expansion('GPT-SOVITS', icon="settings", value=True).classes('w-full'):
with ui.row():
input_vits_gpt_sovits_id = ui.input(label='说话人ID', value=config.get("vits", "gpt_sovits", "id"), placeholder='API启动时会给配置文件重新划分id,一般为拼音顺序排列,从0开始').style("width:100px;")

select_vits_gpt_sovits_lang = ui.select(
label='语言',
options={'auto': '自动', 'zh': '中文', 'jp': '英文', 'en': '日文'},
value=config.get("vits", "gpt_sovits", "lang")
).style("width:100px;")
input_vits_gpt_sovits_format = ui.input(label='音频格式', value=config.get("vits", "gpt_sovits", "format"), placeholder='支持wav,ogg,silk,mp3,flac').style("width:100px;")
input_vits_gpt_sovits_segment_size = ui.input(label='segment_size', value=config.get("vits", "gpt_sovits", "segment_size"), placeholder='segment_size').style("width:100px;")
input_vits_gpt_sovits_reference_audio = ui.input(label='参考音频路径', value=config.get("vits", "gpt_sovits", "reference_audio"), placeholder='参考音频路径').style("width:200px;")
input_vits_gpt_sovits_prompt_text = ui.input(label='参考音频文本内容', value=config.get("vits", "gpt_sovits", "prompt_text"), placeholder='参考音频文本内容').style("width:200px;")
select_vits_gpt_sovits_prompt_lang = ui.select(
label='参考音频语言',
options={'auto': '自动', 'zh': '中文', 'jp': '英文', 'en': '日文'},
value=config.get("vits", "gpt_sovits", "prompt_lang")
).style("width:150px;")
with ui.row():
input_vits_gpt_sovits_top_k = ui.input(label='top_k', value=config.get("vits", "gpt_sovits", "top_k"), placeholder='top_k').style("width:100px;")
input_vits_gpt_sovits_top_p = ui.input(label='top_p', value=config.get("vits", "gpt_sovits", "top_p"), placeholder='top_p').style("width:100px;")
input_vits_gpt_sovits_temperature = ui.input(label='temperature', value=config.get("vits", "gpt_sovits", "temperature"), placeholder='temperature').style("width:100px;")
input_vits_gpt_sovits_preset = ui.input(label='preset', value=config.get("vits", "gpt_sovits", "preset"), placeholder='preset').style("width:100px;")


if config.get("webui", "show_card", "tts", "bert_vits2"):
with ui.card().style(card_css):
ui.label("bert_vits2")
Expand Down

0 comments on commit b63d205

Please sign in to comment.