Skip to content

Commit

Permalink
修复:流式LLM时 如果结尾没有中文标点符号会丢句子的bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Oct 2, 2024
2 parents ea21bdc + c02512d commit 89011a2
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 185 deletions.
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
# ✨ Luna AI ✨
<div align="center">
<a href="https://ikaros-521.github.io/Luna-Docs/site/">
<img src="https://raw.githubusercontent.com/Ikaros-521/AI-Vtuber/refs/heads/main/ui/icon.png" width="240" height="240" alt="点我跳转文档">
</a>
</div>

<div align="center">

# ✨ Luna AI ✨



[![][FOSSA-Status]][FOSSA-Status]
[![][python]][python]
[![][github-release-shield]][github-release-link]
[![][github-stars-shield]][github-stars-link]
[![][github-forks-shield]][github-forks-link]
[![][github-issues-shield]][github-issues-link]
[![][github-contributors-shield]][github-contributors-link]
[![][github-issues-shield]][github-issues-link]
[![][github-license-shield]][github-license-link]
[![][FOSSA-Status]][FOSSA-Status]


</div>

`Luna AI` 是一款结合了最先进技术的虚拟AI主播。它的核心是一系列高效的人工智能模型和平台,包括 `ChatterBot、GPT、Claude、langchain、chatglm、text-generation-webui、讯飞星火、智谱AI、谷歌Bard、通义星尘、阿里云百炼(通义千问、百川、月之暗面、零一万物、MiniMax)、千帆大模型(文心一言)、Gemini、Kimi Chat、QAnything、koboldcpp、FastGPT、Ollama、One-API、AnythingLLM、LLM_TPU、Dify、火山引擎(豆包)`。这些模型既可以在本地运行,也可以通过云端服务提供支持。当然,为了让对话照进现实,还结合了多模态模型,包括 `Gemini、glm-4v` 的图像识别能力,获取电脑画面进行分析讲解。

`Luna AI` 的外观由 `Live2D、Vtube Studio、xuniren、UE5 结合 Audio2Face、EasyAIVtuber、数字人视频播放器(Easy-Wav2Lip、Sadtalker、GeneFace++、MuseTalk、AniTalker、本地视频)、metahuman-stream(ernerf、musetalk、wav2lip)、DH_live、live2d-TTS-LLM-GPT-SoVITS-Vtuber` 技术打造,为用户提供了一个生动、互动的虚拟形象。这使得 `Luna AI` 能够在各大直播平台,如 `Bilibili、抖音、快手、微信视频号、拼多多、1688、斗鱼、YouTube、Twitch 和 TikTok`,进行实时互动直播。当然,它也可以在本地环境中与您进行个性化对话。
Expand Down
31 changes: 15 additions & 16 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
"enable": false,
"copywriting": "{username}说:{comment}"
},
"reply_template": {
"enable": false,
"username_max_len": 10,
"copywriting": [
"回复{username}{data}",
"{username}{data}"
],
"username_max_le": 10
},
"comment_log_type": "回答",
"visual_body": "其他",
"xuniren": {
Expand Down Expand Up @@ -73,21 +82,6 @@
"proxy_server": "127.0.0.1",
"proxy_port": "10809"
},
"read_username": {
"enable": false,
"username_max_len": 10,
"voice_change": false,
"reply_before": [
"回复{username}",
"{username}",
"回{username}"
],
"reply_after": [
"以上内容回复{username}",
"回的{username}",
"以上回复{username}"
]
},
"read_comment": {
"enable": true,
"read_username_enable": true,
Expand Down Expand Up @@ -263,6 +257,11 @@
},
"search_online": {
"enable": false,
"keyword_enable": true,
"before_keyword": [
"联网",
"在线"
],
"engine": "baidu",
"engine_id": 1,
"count": 1,
Expand Down Expand Up @@ -570,7 +569,7 @@
},
"local_qa": {
"periodic_trigger": {
"enable": true,
"enable": false,
"periodic_time_min": 10,
"periodic_time_max": 30,
"trigger_num_min": 0,
Expand Down
31 changes: 15 additions & 16 deletions config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
"enable": false,
"copywriting": "{username}说:{comment}"
},
"reply_template": {
"enable": false,
"username_max_len": 10,
"copywriting": [
"回复{username}{data}",
"{username}{data}"
],
"username_max_le": 10
},
"comment_log_type": "回答",
"visual_body": "其他",
"xuniren": {
Expand Down Expand Up @@ -73,21 +82,6 @@
"proxy_server": "127.0.0.1",
"proxy_port": "10809"
},
"read_username": {
"enable": false,
"username_max_len": 10,
"voice_change": false,
"reply_before": [
"回复{username}",
"{username}",
"回{username}"
],
"reply_after": [
"以上内容回复{username}",
"回的{username}",
"以上回复{username}"
]
},
"read_comment": {
"enable": true,
"read_username_enable": true,
Expand Down Expand Up @@ -263,6 +257,11 @@
},
"search_online": {
"enable": false,
"keyword_enable": true,
"before_keyword": [
"联网",
"在线"
],
"engine": "baidu",
"engine_id": 1,
"count": 1,
Expand Down Expand Up @@ -570,7 +569,7 @@
},
"local_qa": {
"periodic_trigger": {
"enable": true,
"enable": false,
"periodic_time_min": 10,
"periodic_time_max": 30,
"trigger_num_min": 0,
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ async def send(msg: SendMessage):

try:
tmp_json = msg.dict()
logger.info(f"API收到数据{tmp_json}")
logger.info(f"内部HTTP API send接口收到数据{tmp_json}")
data_json = tmp_json["data"]
if "type" not in data_json:
data_json["type"] = tmp_json["type"]
Expand Down Expand Up @@ -228,7 +228,7 @@ async def callback(msg: CallbackMessage):

try:
data_json = msg.dict()
logger.info(f"API收到数据{data_json}")
logger.info(f"内部HTTP API callback接口收到数据{data_json}")

# 音频播放完成
if data_json["type"] in ["audio_playback_completed"]:
Expand Down
34 changes: 3 additions & 31 deletions utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,22 +670,6 @@ def audio_synthesis(self, message):
if "insert_index" in data_json:
data_json["insert_index"] = message["insert_index"]

# 回复时是否念用户名字
if self.config.get("read_username", "enable"):
# 由于线程是独立的,所以回复音频的合成会慢于本地音频直接播放,所以以倒述的形式回复
tmp_message = deepcopy(message)
tmp_message['type'] = "reply"
tmp_message['content'] = random.choice(self.config.get("read_username", "reply_after"))
if "{username}" in tmp_message['content']:
tmp_message['content'] = tmp_message['content'].format(username=message['username'][:self.config.get("read_username", "username_max_len")])

logger.info(f"tmp_message={tmp_message}")

self.data_priority_insert("等待合成消息", tmp_message)
# else:
# logger.info(f"message={message}")
# self.data_priority_insert("等待合成消息", message)

# 是否开启了音频播放
if self.config.get("play_audio", "enable"):
self.data_priority_insert("等待合成消息", data_json)
Expand All @@ -708,18 +692,6 @@ def audio_synthesis(self, message):
self.data_priority_insert("等待合成消息", data_json)
return

# 只有信息类型是 弹幕,才会进行念用户名
elif message['type'] == "comment":
# 回复时是否念用户名字
if self.config.get("read_username", "enable"):
tmp_message = deepcopy(message)
tmp_message['type'] = "reply"
tmp_message['content'] = random.choice(self.config.get("read_username", "reply_before"))
if "{username}" in tmp_message['content']:
# 将用户名中特殊字符替换为空
message['username'] = self.common.replace_special_characters(message['username'], "!!@#¥$%^&*_-+/——=()()【】}|{:;<>~`\\")
tmp_message['content'] = tmp_message['content'].format(username=message['username'][:self.config.get("read_username", "username_max_len")])
self.data_priority_insert("等待合成消息", tmp_message)
# 闲时任务
elif message['type'] == "idle_time_task":
if message['content_type'] in ["comment", "reread"]:
Expand Down Expand Up @@ -1238,13 +1210,13 @@ async def voice_change_and_put_to_queue(message, voice_tmp_path):
data_json["insert_index"] = message["insert_index"]

# 区分消息类型是否是 回复xxx 并且 关闭了变声
if message["type"] == "reply" and False == self.config.get("read_username", "voice_change"):
if message["type"] == "reply":
# 是否开启了音频播放,如果没开,则不会传文件路径给播放队列
if self.config.get("play_audio", "enable"):
self.data_priority_insert("待播放音频列表", data_json)
return True
# 区分消息类型是否是 念弹幕 并且 关闭了变声
elif message["type"] == "read_comment" and False == self.config.get("read_comment", "voice_change"):
elif message["type"] == "read_comment" and not self.config.get("read_comment", "voice_change"):
# 是否开启了音频播放,如果没开,则不会传文件路径给播放队列
if self.config.get("play_audio", "enable"):
self.data_priority_insert("待播放音频列表", data_json)
Expand Down Expand Up @@ -1274,7 +1246,7 @@ async def voice_change_and_put_to_queue(message, voice_tmp_path):

return False

logger.info(f"{message['tts_type']}合成成功,合成内容:【{message['content']}】,输出到={voice_tmp_path}")
logger.info(f"[{message['tts_type']}]合成成功,合成内容:【{message['content']}】,音频存储在 {voice_tmp_path}")

await voice_change_and_put_to_queue(message, voice_tmp_path)

Expand Down
44 changes: 43 additions & 1 deletion utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,30 @@ def is_valid_port(self, port):
except ValueError:
return False

# 判断传入的字符串是否是文件夹路径或文件路径,且此文件夹路径或文件路径是否存在,返回bool
def is_dir_or_file(self, path: str, type: str="all"):
"""判断传入的字符串是否是文件夹路径或文件路径,且此文件夹路径或文件路径是否存在,返回bool
Args:
path (str): 文件夹路径或文件路径
type (str, optional): 检测类型. Defaults to "all".
Returns:
bool: 结果
"""
if type == "dir":
if os.path.isdir(path):
return True
return False
elif type == "file":
if os.path.isfile(path):
return True
return False
else:
if os.path.isdir(path) or os.path.isfile(path):
return True
return False

# 识别操作系统
def detect_os(self):
"""
Expand Down Expand Up @@ -764,6 +788,23 @@ def get_random_str_in_list_and_format(self, ori_content: str = None, ori_list: l

return {"ret": 0, "content": content}

def get_list_random_or_default(self, strings: list, default_value):
"""
从列表中随机选择一个字符串,如果列表为空,则返回默认值。
参数:
strings (list of str): 字符串列表。
default_value (str): 默认值。
返回:
str: 随机选择的字符串或默认值。
"""
if not strings: # 如果列表是空的
return default_value
else:
return random.choice(strings)

"""
.@@@ @@@ @@^ =@@@@@@@@ /@@ /@@ =@@@@@*,@@\]]]] ,@@@@@@@@@@@@* .@@@ @@/.\]`@@@ =@@\]]]]]]] =@@..@@@@@@@@@ =@@\ /@@^
Expand Down Expand Up @@ -946,6 +987,7 @@ def get_live2d_model_name(self, path):
else:
return None




"""
Expand Down Expand Up @@ -995,7 +1037,7 @@ def write_content_to_file(self, file_path, content, write_log=True):
with open(file_path, 'w', encoding='utf-8') as file:
file.write(content)

if write_log == True:
if write_log:
logger.info(f"写入文件:{file_path},内容:【{content}】")

return True
Expand Down
Loading

0 comments on commit 89011a2

Please sign in to comment.