diff --git a/README.md b/README.md index 0bde8bf..1bc9118 100644 --- a/README.md +++ b/README.md @@ -49,14 +49,20 @@ python:3.10.10 | msg | string | 响应消息,描述请求的处理结果 | # 更新日志 +- v0.1.3 + - sadtalker新增参数`gradio_api_type`用于适配不同的接口传参(api_name/fn_index) + - 补充测试图和视频 -- v0.1.0 - - 初版发布 +- v0.1.2 + - 对接GeneFacePlusPlus(未测试) - v0.1.1 - 对接sadtalker - API新增参数type - 优化视频播放逻辑,尝试解决视频过渡时的无效等待问题 -- v0.1.2 - - 对接GeneFacePlusPlus(未测试) + +- v0.1.0 + - 初版发布 + + diff --git a/api_server.py b/api_server.py index fe97c9b..28cfe05 100644 --- a/api_server.py +++ b/api_server.py @@ -50,17 +50,31 @@ def get_video(type: str, audio_path: str): return result[0]["video"] elif type == "sadtalker": client = Client(config.get("sadtalker", "api_ip_port")) - result = client.predict( - config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component - audio_path, # filepath in 'Input audio' Audio component - config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component - config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component - config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component - config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component - config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component - config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component - api_name="/test" - ) + + if config.get("sadtalker", "gradio_api_type") == "api_name": + result = client.predict( + config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component + audio_path, # filepath in 'Input audio' Audio component + config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component + config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component + config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component + config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component + config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component + config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component + api_name="/test" + ) + else: + result = client.predict( + config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component + audio_path, # filepath in 'Input audio' Audio component + config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component + config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component + config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component + config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component + config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component + config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component + fn_index=1 + ) logging.info(f'合成成功,生成在:{result["video"]}') diff --git a/config.json b/config.json index 7b4a6b8..dafbc1d 100644 --- a/config.json +++ b/config.json @@ -20,12 +20,13 @@ "batch_process": "False" }, "sadtalker": { + "gradio_api_type": "api_name", "api_ip_port": "http://127.0.0.1:7860", - "img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\1.png", + "img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\2.png", "preprocess": "crop", "still_mode": false, - "GFPGAN": false, - "batch_size": 2, + "GFPGAN": true, + "batch_size": 4, "face_model_resolution": 256, "pose_style": 0 }, diff --git a/static/imgs/2.png b/static/imgs/2.png new file mode 100644 index 0000000..464f591 Binary files /dev/null and b/static/imgs/2.png differ diff --git a/static/index.html b/static/index.html index 6daa473..a9265e4 100644 --- a/static/index.html +++ b/static/index.html @@ -25,7 +25,7 @@ // 视频播放队列 let videoQueue = []; - let defaultVideo = './videos/1.mp4'; // 默认视频路径 + let defaultVideo = './videos/2.mp4'; // 默认视频路径 let currentVideo = video1; // 当前播放的视频元素 let nextVideo = video2; // 下一个将要播放的视频元素 let isWaitingForNextVideo = false; // 是否正在等待下一个视频加载 diff --git a/static/videos/2.mp4 b/static/videos/2.mp4 index 08e3ae9..d66bdd6 100644 Binary files a/static/videos/2.mp4 and b/static/videos/2.mp4 differ diff --git a/tests/sadtalker.py b/tests/sadtalker.py index 914986c..a7ce1ef 100644 --- a/tests/sadtalker.py +++ b/tests/sadtalker.py @@ -1,6 +1,19 @@ from gradio_client import Client -client = Client("http://127.0.0.1:7860/") +#client = Client("http://127.0.0.1:7860/") +# result = client.predict( +# "C:\\Users\\Administrator\\Pictures\\test\\1.png", # filepath in 'Source image' Image component +# "C:\\Users\\Administrator\\Pictures\\test\\2.mp3", # filepath in 'Input audio' Audio component +# "crop", # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component +# True, # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component +# True, # bool in 'GFPGAN as Face enhancer' Checkbox component +# 2, # float (numeric value between 0 and 10) in 'batch size in generation' Slider component +# 256, # Literal[256, 512] in 'face model resolution' Radio component +# 0, # float (numeric value between 0 and 46) in 'Pose style' Slider component +# api_name="/test" +# ) + +client = Client("https://u373179-884c-4835511a.westc.gpuhub.com:8443/") result = client.predict( "C:\\Users\\Administrator\\Pictures\\test\\1.png", # filepath in 'Source image' Image component "C:\\Users\\Administrator\\Pictures\\test\\2.mp3", # filepath in 'Input audio' Audio component @@ -10,6 +23,6 @@ 2, # float (numeric value between 0 and 10) in 'batch size in generation' Slider component 256, # Literal[256, 512] in 'face model resolution' Radio component 0, # float (numeric value between 0 and 46) in 'Pose style' Slider component - api_name="/test" + fn_index=1 ) print(result) \ No newline at end of file