Skip to content

Commit

Permalink
sadtalker新增参数gradio_api_type用于适配不同的接口传参
Browse files Browse the repository at this point in the history
  • Loading branch information
Ikaros-521 committed Apr 9, 2024
1 parent bbeba05 commit f01032e
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 21 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,20 @@ python:3.10.10
| msg | string | 响应消息,描述请求的处理结果 |

# 更新日志
- v0.1.3
- sadtalker新增参数`gradio_api_type`用于适配不同的接口传参(api_name/fn_index)
- 补充测试图和视频

- v0.1.0
- 初版发布
- v0.1.2
- 对接GeneFacePlusPlus(未测试)

- v0.1.1
- 对接sadtalker
- API新增参数type
- 优化视频播放逻辑,尝试解决视频过渡时的无效等待问题

- v0.1.2
- 对接GeneFacePlusPlus(未测试)

- v0.1.0
- 初版发布


36 changes: 25 additions & 11 deletions api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,31 @@ def get_video(type: str, audio_path: str):
return result[0]["video"]
elif type == "sadtalker":
client = Client(config.get("sadtalker", "api_ip_port"))
result = client.predict(
config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component
audio_path, # filepath in 'Input audio' Audio component
config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component
config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component
config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component
config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component
config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component
api_name="/test"
)

if config.get("sadtalker", "gradio_api_type") == "api_name":
result = client.predict(
config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component
audio_path, # filepath in 'Input audio' Audio component
config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component
config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component
config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component
config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component
config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component
api_name="/test"
)
else:
result = client.predict(
config.get("sadtalker", "img_file"), # filepath in 'Source image' Image component
audio_path, # filepath in 'Input audio' Audio component
config.get("sadtalker", "preprocess"), # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component
config.get("sadtalker", "still_mode"), # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
config.get("sadtalker", "GFPGAN"), # bool in 'GFPGAN as Face enhancer' Checkbox component
config.get("sadtalker", "batch_size"), # float (numeric value between 0 and 10) in 'batch size in generation' Slider component
config.get("sadtalker", "face_model_resolution"), # Literal[256, 512] in 'face model resolution' Radio component
config.get("sadtalker", "pose_style"), # float (numeric value between 0 and 46) in 'Pose style' Slider component
fn_index=1
)

logging.info(f'合成成功,生成在:{result["video"]}')

Expand Down
7 changes: 4 additions & 3 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
"batch_process": "False"
},
"sadtalker": {
"gradio_api_type": "api_name",
"api_ip_port": "http://127.0.0.1:7860",
"img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\1.png",
"img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\2.png",
"preprocess": "crop",
"still_mode": false,
"GFPGAN": false,
"batch_size": 2,
"GFPGAN": true,
"batch_size": 4,
"face_model_resolution": 256,
"pose_style": 0
},
Expand Down
Binary file added static/imgs/2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

// 视频播放队列
let videoQueue = [];
let defaultVideo = './videos/1.mp4'; // 默认视频路径
let defaultVideo = './videos/2.mp4'; // 默认视频路径
let currentVideo = video1; // 当前播放的视频元素
let nextVideo = video2; // 下一个将要播放的视频元素
let isWaitingForNextVideo = false; // 是否正在等待下一个视频加载
Expand Down
Binary file modified static/videos/2.mp4
Binary file not shown.
17 changes: 15 additions & 2 deletions tests/sadtalker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
from gradio_client import Client

client = Client("http://127.0.0.1:7860/")
#client = Client("http://127.0.0.1:7860/")
# result = client.predict(
# "C:\\Users\\Administrator\\Pictures\\test\\1.png", # filepath in 'Source image' Image component
# "C:\\Users\\Administrator\\Pictures\\test\\2.mp3", # filepath in 'Input audio' Audio component
# "crop", # Literal[crop, resize, full, extcrop, extfull] in 'preprocess' Radio component
# True, # bool in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
# True, # bool in 'GFPGAN as Face enhancer' Checkbox component
# 2, # float (numeric value between 0 and 10) in 'batch size in generation' Slider component
# 256, # Literal[256, 512] in 'face model resolution' Radio component
# 0, # float (numeric value between 0 and 46) in 'Pose style' Slider component
# api_name="/test"
# )

client = Client("https://u373179-884c-4835511a.westc.gpuhub.com:8443/")
result = client.predict(
"C:\\Users\\Administrator\\Pictures\\test\\1.png", # filepath in 'Source image' Image component
"C:\\Users\\Administrator\\Pictures\\test\\2.mp3", # filepath in 'Input audio' Audio component
Expand All @@ -10,6 +23,6 @@
2, # float (numeric value between 0 and 10) in 'batch size in generation' Slider component
256, # Literal[256, 512] in 'face model resolution' Radio component
0, # float (numeric value between 0 and 46) in 'Pose style' Slider component
api_name="/test"
fn_index=1
)
print(result)

0 comments on commit f01032e

Please sign in to comment.