sadtalker新增参数gradio_api_type用于适配不同的接口传参

Ikaros-521 · Apr 9, 2024 · f01032e · f01032e
1 parent bbeba05
commit f01032e
Show file tree

Hide file tree

Showing 7 changed files with 55 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -49,14 +49,20 @@ python：3.10.10
 | msg    | string  | 响应消息，描述请求的处理结果 |
 
 # 更新日志
+- v0.1.3
+    - sadtalker新增参数`gradio_api_type`用于适配不同的接口传参（api_name/fn_index）
+    - 补充测试图和视频
 
-- v0.1.0
-    - 初版发布
+- v0.1.2
+    - 对接GeneFacePlusPlus(未测试)
 
 - v0.1.1
     - 对接sadtalker
     - API新增参数type
     - 优化视频播放逻辑，尝试解决视频过渡时的无效等待问题
 
-- v0.1.2
-    - 对接GeneFacePlusPlus(未测试)
+
+- v0.1.0
+    - 初版发布
+
+
diff --git a/api_server.py b/api_server.py
@@ -50,17 +50,31 @@ def get_video(type: str, audio_path: str):
             return result[0]["video"]
         elif type == "sadtalker":
             client = Client(config.get("sadtalker", "api_ip_port"))
-            result = client.predict(
-                config.get("sadtalker", "img_file"),	# filepath  in 'Source image' Image component
-                audio_path,	# filepath  in 'Input audio' Audio component
-                config.get("sadtalker", "preprocess"),	# Literal[crop, resize, full, extcrop, extfull]  in 'preprocess' Radio component
-                config.get("sadtalker", "still_mode"),	# bool  in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
-                config.get("sadtalker", "GFPGAN"),	# bool  in 'GFPGAN as Face enhancer' Checkbox component
-                config.get("sadtalker", "batch_size"),	# float (numeric value between 0 and 10) in 'batch size in generation' Slider component
-                config.get("sadtalker", "face_model_resolution"),	# Literal[256, 512]  in 'face model resolution' Radio component
-                config.get("sadtalker", "pose_style"),	# float (numeric value between 0 and 46) in 'Pose style' Slider component
-                api_name="/test"
-            )
+
+            if config.get("sadtalker", "gradio_api_type") == "api_name":
+                result = client.predict(
+                    config.get("sadtalker", "img_file"),	# filepath  in 'Source image' Image component
+                    audio_path,	# filepath  in 'Input audio' Audio component
+                    config.get("sadtalker", "preprocess"),	# Literal[crop, resize, full, extcrop, extfull]  in 'preprocess' Radio component
+                    config.get("sadtalker", "still_mode"),	# bool  in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
+                    config.get("sadtalker", "GFPGAN"),	# bool  in 'GFPGAN as Face enhancer' Checkbox component
+                    config.get("sadtalker", "batch_size"),	# float (numeric value between 0 and 10) in 'batch size in generation' Slider component
+                    config.get("sadtalker", "face_model_resolution"),	# Literal[256, 512]  in 'face model resolution' Radio component
+                    config.get("sadtalker", "pose_style"),	# float (numeric value between 0 and 46) in 'Pose style' Slider component
+                    api_name="/test"
+                )
+            else:
+                result = client.predict(
+                    config.get("sadtalker", "img_file"),	# filepath  in 'Source image' Image component
+                    audio_path,	# filepath  in 'Input audio' Audio component
+                    config.get("sadtalker", "preprocess"),	# Literal[crop, resize, full, extcrop, extfull]  in 'preprocess' Radio component
+                    config.get("sadtalker", "still_mode"),	# bool  in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
+                    config.get("sadtalker", "GFPGAN"),	# bool  in 'GFPGAN as Face enhancer' Checkbox component
+                    config.get("sadtalker", "batch_size"),	# float (numeric value between 0 and 10) in 'batch size in generation' Slider component
+                    config.get("sadtalker", "face_model_resolution"),	# Literal[256, 512]  in 'face model resolution' Radio component
+                    config.get("sadtalker", "pose_style"),	# float (numeric value between 0 and 46) in 'Pose style' Slider component
+                    fn_index=1
+                )
 
             logging.info(f'合成成功，生成在：{result["video"]}')
 

diff --git a/config.json b/config.json
@@ -20,12 +20,13 @@
         "batch_process": "False"
     },
     "sadtalker": {
+        "gradio_api_type": "api_name",
         "api_ip_port": "http://127.0.0.1:7860",
-        "img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\1.png",
+        "img_file": "E:\\GitHub_pro\\digital_human_video_player\\static\\imgs\\2.png",
         "preprocess": "crop",
         "still_mode": false,
-        "GFPGAN": false,
-        "batch_size": 2,
+        "GFPGAN": true,
+        "batch_size": 4,
         "face_model_resolution": 256,
         "pose_style": 0
     },

diff --git a/static/imgs/2.png b/static/imgs/2.png
diff --git a/static/index.html b/static/index.html
@@ -25,7 +25,7 @@
 
     // 视频播放队列
     let videoQueue = [];
-    let defaultVideo = './videos/1.mp4'; // 默认视频路径
+    let defaultVideo = './videos/2.mp4'; // 默认视频路径
     let currentVideo = video1; // 当前播放的视频元素
     let nextVideo = video2; // 下一个将要播放的视频元素
     let isWaitingForNextVideo = false; // 是否正在等待下一个视频加载

diff --git a/static/videos/2.mp4 b/static/videos/2.mp4
diff --git a/tests/sadtalker.py b/tests/sadtalker.py
@@ -1,6 +1,19 @@
 from gradio_client import Client
 
-client = Client("http://127.0.0.1:7860/")
+#client = Client("http://127.0.0.1:7860/")
+# result = client.predict(
+# 		"C:\\Users\\Administrator\\Pictures\\test\\1.png",	# filepath  in 'Source image' Image component
+# 		"C:\\Users\\Administrator\\Pictures\\test\\2.mp3",	# filepath  in 'Input audio' Audio component
+# 		"crop",	# Literal[crop, resize, full, extcrop, extfull]  in 'preprocess' Radio component
+# 		True,	# bool  in 'Still Mode (fewer head motion, works with preprocess `full`)' Checkbox component
+# 		True,	# bool  in 'GFPGAN as Face enhancer' Checkbox component
+# 		2,	# float (numeric value between 0 and 10) in 'batch size in generation' Slider component
+# 		256,	# Literal[256, 512]  in 'face model resolution' Radio component
+# 		0,	# float (numeric value between 0 and 46) in 'Pose style' Slider component
+# 		api_name="/test"
+# )
+
+client = Client("https://u373179-884c-4835511a.westc.gpuhub.com:8443/")
 result = client.predict(
 		"C:\\Users\\Administrator\\Pictures\\test\\1.png",	# filepath  in 'Source image' Image component
 		"C:\\Users\\Administrator\\Pictures\\test\\2.mp3",	# filepath  in 'Input audio' Audio component
@@ -10,6 +23,6 @@
 		2,	# float (numeric value between 0 and 10) in 'batch size in generation' Slider component
 		256,	# Literal[256, 512]  in 'face model resolution' Radio component
 		0,	# float (numeric value between 0 and 46) in 'Pose style' Slider component
-		api_name="/test"
+		fn_index=1
 )
 print(result)