diff --git a/addons/real-time-lip-sync-gd/lip_sync.gdns b/addons/real-time-lip-sync-gd/lip_sync.gdns
new file mode 100644
index 00000000..ec433630
--- /dev/null
+++ b/addons/real-time-lip-sync-gd/lip_sync.gdns
@@ -0,0 +1,8 @@
+[gd_resource type="NativeScript" load_steps=2 format=2]
+
+[ext_resource path="res://addons/real-time-lip-sync-gd/real_time_lip_sync.gdnlib" type="GDNativeLibrary" id=1]
+
+[resource]
+resource_name = "LipSync"
+class_name = "LipSync"
+library = ExtResource( 1 )
diff --git a/addons/real-time-lip-sync-gd/real_time_lip_sync.gdnlib b/addons/real-time-lip-sync-gd/real_time_lip_sync.gdnlib
new file mode 100644
index 00000000..e77f2cb8
--- /dev/null
+++ b/addons/real-time-lip-sync-gd/real_time_lip_sync.gdnlib
@@ -0,0 +1,14 @@
+[general]
+
+singleton=false
+load_once=true
+symbol_prefix="godot_"
+reloadable=true
+
+[entry]
+
+Windows.64="res://addons/real-time-lip-sync-gd/real_time_lip_sync_gd.dll"
+
+[dependencies]
+
+Windows.64=[ ]
diff --git a/addons/real-time-lip-sync-gd/real_time_lip_sync_gd.dll b/addons/real-time-lip-sync-gd/real_time_lip_sync_gd.dll
new file mode 100644
index 00000000..d24894ed
Binary files /dev/null and b/addons/real-time-lip-sync-gd/real_time_lip_sync_gd.dll differ
diff --git a/entities/vrm/VRMModel.gd b/entities/vrm/VRMModel.gd
index d6d7f913..d4ff3429 100644
--- a/entities/vrm/VRMModel.gd
+++ b/entities/vrm/VRMModel.gd
@@ -57,6 +57,11 @@ var u: ExpressionData
# TODO stopgap
var last_expression: ExpressionData
+var current_mouth_shape: ExpressionData
+const VOWEL_HISTORY: int = 5 # TODO move to config
+const MIN_VOWEL_CHANGE: int = 3 # TODO move to config
+var last_vowels: Array = []
+
var all_expressions: Dictionary = {} # String: ExpressionData
###############################################################################
@@ -79,6 +84,8 @@ func _ready() -> void:
# TODO stopgap
AppManager.sb.connect("blend_shapes", self, "_on_blend_shapes")
+ AppManager.sb.connect("lip_sync_updated", self, "_on_lip_sync_updated")
+
# Map expressions
var anim_player: AnimationPlayer = find_node("anim")
@@ -113,6 +120,8 @@ func _ready() -> void:
for key in all_expressions.keys():
set(key, all_expressions[key])
+
+ current_mouth_shape = a
_map_eye_expressions(all_expressions)
@@ -154,6 +163,53 @@ func _on_blend_shapes(value: String) -> void:
last_expression = ed
+func _on_lip_sync_updated(data: Dictionary) -> void:
+ for x in current_mouth_shape.morphs:
+ _modify_blend_shape(x.mesh, x.morph, 1)
+
+ last_vowels.push_back(data["vowel"])
+ if last_vowels.size() > VOWEL_HISTORY:
+ last_vowels.pop_front()
+
+ var vowel_count: Dictionary = {
+ "a": 0,
+ "i": 0,
+ "u": 0,
+ "e": 0,
+ "o": 0
+ }
+ for x in last_vowels:
+ match x:
+ 0: # A
+ vowel_count.a += 1
+ 1: # I
+ vowel_count.i += 1
+ 2: # U
+ vowel_count.u += 1
+ 3: # E
+ vowel_count.e += 1
+ 4: # O
+ vowel_count.o += 1
+
+ var last_shape = current_mouth_shape
+
+ if vowel_count.a >= MIN_VOWEL_CHANGE:
+ current_mouth_shape = a
+ elif vowel_count.i >= MIN_VOWEL_CHANGE:
+ current_mouth_shape = i
+ elif vowel_count.u >= MIN_VOWEL_CHANGE:
+ current_mouth_shape = u
+ elif vowel_count.e >= MIN_VOWEL_CHANGE:
+ current_mouth_shape = e
+ elif vowel_count.o >= MIN_VOWEL_CHANGE:
+ current_mouth_shape = o
+
+ if current_mouth_shape != last_shape:
+ for x in current_mouth_shape.morphs:
+ _modify_blend_shape(x.mesh, x.morph, 1)
+ for x in last_shape.morphs:
+ _modify_blend_shape(x.mesh, x.morph, 0)
+
###############################################################################
# Private functions #
###############################################################################
@@ -276,25 +332,25 @@ func custom_update(data, interpolation_data) -> void:
if (last_expression != joy and last_expression != sorrow):
# Left eye blinking
if data.left_eye_open >= blink_threshold:
- for i in blink_r.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[1] - interpolation_data.interpolate(InterpolationData.InterpolationDataType.LEFT_EYE_BLINK, 1.0))
+ for x in blink_r.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[1] - interpolation_data.interpolate(InterpolationData.InterpolationDataType.LEFT_EYE_BLINK, 1.0))
else:
- for i in blink_r.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[1])
+ for x in blink_r.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[1])
# Right eye blinking
if data.right_eye_open >= blink_threshold:
- for i in blink_l.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[1] - interpolation_data.interpolate(InterpolationData.InterpolationDataType.RIGHT_EYE_BLINK, 1.0))
+ for x in blink_l.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[1] - interpolation_data.interpolate(InterpolationData.InterpolationDataType.RIGHT_EYE_BLINK, 1.0))
else:
- for i in blink_l.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[1])
+ for x in blink_l.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[1])
else:
# Unblink if the facial expression doesn't allow blinking
- for i in blink_r.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[0])
- for i in blink_l.morphs:
- _modify_blend_shape(i.mesh, i.morph, i.values[0])
+ for x in blink_r.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[0])
+ for x in blink_l.morphs:
+ _modify_blend_shape(x.mesh, x.morph, x.values[0])
# TODO eyes show weird behaviour when blinking
# TODO make sure angle between eyes' x values are at least parallel
@@ -340,10 +396,10 @@ func custom_update(data, interpolation_data) -> void:
skeleton.set_bone_pose(left_eye_id, right_eye_transform)
# Mouth tracking
- for i in a.morphs:
- _modify_blend_shape(i.mesh, i.morph,
- min(max(i.values[0], interpolation_data.interpolate(InterpolationData.InterpolationDataType.MOUTH_MOVEMENT, 2.0)),
- i.values[1]))
+ for x in current_mouth_shape.morphs:
+ _modify_blend_shape(x.mesh, x.morph,
+ min(max(x.values[0], interpolation_data.interpolate(InterpolationData.InterpolationDataType.MOUTH_MOVEMENT, 2.0)),
+ x.values[1]))
else:
# TODO implement eco mode, should be more efficient than standard mode
# Eco-mode blinking
diff --git a/project.godot b/project.godot
index 9ffa49f6..28ebb3bd 100644
--- a/project.godot
+++ b/project.godot
@@ -69,6 +69,11 @@ _global_script_classes=[ {
"language": "GDScript",
"path": "res://utils/JSONUtil.gd"
}, {
+"base": "Node",
+"class": "LipSyncManager",
+"language": "GDScript",
+"path": "res://utils/LipSyncManager.gd"
+}, {
"base": "Reference",
"class": "Logger",
"language": "GDScript",
@@ -122,6 +127,7 @@ _global_script_class_icons={
"GOTHGui": "",
"InterpolationData": "",
"JSONUtil": "",
+"LipSyncManager": "",
"Logger": "",
"MainScreen": "",
"ModelDisplayScreen": "",
@@ -138,6 +144,10 @@ config/name="OpenSeeFace GD"
run/main_scene="res://screens/MainScreen.tscn"
config/icon="res://assets/osfgd_icon.png"
+[audio]
+
+enable_audio_input=true
+
[autoload]
AppManager="*res://utils/AppManager.gd"
diff --git a/resources/gui/tracking.xml b/resources/gui/tracking.xml
index f65de578..817735d6 100644
--- a/resources/gui/tracking.xml
+++ b/resources/gui/tracking.xml
@@ -37,6 +37,8 @@
+
+
diff --git a/screens/MainScreen.gd b/screens/MainScreen.gd
index a2862af3..1c1993ea 100644
--- a/screens/MainScreen.gd
+++ b/screens/MainScreen.gd
@@ -39,7 +39,7 @@ func _ready() -> void:
AppManager.cm.metadata_config.apply_rendering_changes(get_viewport())
- AppManager.logger.notify("Welcome to openseeface-gd!")
+ AppManager.logger.notify("Press TAB to hide the UI")
func _unhandled_input(event: InputEvent) -> void:
if(event.is_action_pressed("ui_cancel") and OS.is_debug_build()):
diff --git a/screens/ModelDisplayScreen.gd b/screens/ModelDisplayScreen.gd
index 3b35a27b..f25563c3 100644
--- a/screens/ModelDisplayScreen.gd
+++ b/screens/ModelDisplayScreen.gd
@@ -78,6 +78,8 @@ func _ready() -> void:
AppManager.sb.connect(i, self, "_on_%s" % i)
set(i, AppManager.cm.current_model_config.get(i))
+ AppManager.sb.connect("lip_sync_updated", self, "_on_lip_sync_updated")
+
if model_resource_path:
_try_load_model(model_resource_path)
@@ -218,6 +220,10 @@ func _on_apply_rotation(value: bool) -> void:
func _on_should_track_eye(value: bool) -> void:
should_track_eye = value
+func _on_lip_sync_updated(_data: Dictionary) -> void:
+ interpolation_data.target_mouth_movement = 1
+ interpolation_data.interpolate(InterpolationData.InterpolationDataType.MOUTH_MOVEMENT, 0.8)
+
###############################################################################
# Private functions #
###############################################################################
@@ -226,6 +232,7 @@ func _try_load_model(file_path):
var dir := Directory.new()
if not dir.file_exists(file_path):
AppManager.logger.error("File path not found: %s" % file_path)
+ AppManager.logger.notify("File path not found: %s" % file_path)
return
match file_path.get_extension():
@@ -268,7 +275,7 @@ func _try_load_model(file_path):
rotation_adjustment = Vector3(-1, -1, 1)
AppManager.logger.info("TSCN file loaded successfully.")
_:
- AppManager.logger.info("File extension not recognized. %s" % file_path)
+ AppManager.logger.notify("File extension not recognized. %s" % file_path)
printerr("File extension not recognized. %s" % file_path)
# TODO probably incorrect?
diff --git a/screens/gui/Toast.gd b/screens/gui/Toast.gd
index 0f2a4ef9..4e040774 100644
--- a/screens/gui/Toast.gd
+++ b/screens/gui/Toast.gd
@@ -37,6 +37,10 @@ func _process(delta: float) -> void:
_on_close()
is_closing = true
+func _unhandled_input(event: InputEvent) -> void:
+ if event.is_action_pressed("toggle_gui"):
+ toast.visible = not toast.visible
+
###############################################################################
# Connections #
###############################################################################
diff --git a/utils/AppManager.gd b/utils/AppManager.gd
index 8f9a6588..2bb31bd4 100644
--- a/utils/AppManager.gd
+++ b/utils/AppManager.gd
@@ -7,6 +7,13 @@ const DYNAMIC_PHYSICS_BONES: bool = false
onready var sb: SignalBroadcaster = load("res://utils/SignalBroadcaster.gd").new()
onready var cm: ConfigManager = load("res://utils/ConfigManager.gd").new()
var nm: NotificationManager = load("res://utils/NotificationManager.gd").new()
+onready var lsm: LipSyncManager = load("res://utils/LipSyncManager.gd").new()
+# TODO clean this up with a stripped down implementation
+#onready var estimate_vowel = load("res://addons/godot-audio-processing/EstimateVowel.gd").new()
+#onready var rtls = load("res://addons/real-time-lip-sync-gd/lip_sync.gdns").new()
+#var effect
+#var buffer = 5
+
onready var logger: Logger = load("res://utils/Logger.gd").new()
# Debounce
@@ -36,8 +43,10 @@ func _ready() -> void:
cm.setup()
add_child(nm)
+ add_child(lsm)
func _process(delta: float) -> void:
+# rtls.poll()
if should_save:
debounce_counter += delta
if debounce_counter > DEBOUNCE_TIME:
diff --git a/utils/ConfigManager.gd b/utils/ConfigManager.gd
index 234b1bf7..f8ca1ff1 100644
--- a/utils/ConfigManager.gd
+++ b/utils/ConfigManager.gd
@@ -30,6 +30,9 @@ class Metadata:
# will always come as a String
var camera_index: String = "0"
+ # Lip sync
+ var use_lip_sync: bool = false
+
func load_from_json(json_string: String) -> bool:
var json_data = parse_json(json_string)
@@ -316,6 +319,8 @@ func _init() -> void:
AppManager.sb.connect("tracker_address", self, "_on_tracker_address")
AppManager.sb.connect("tracker_port", self, "_on_tracker_port")
+ AppManager.sb.connect("use_lip_sync", self, "_on_use_lip_sync")
+
# Features
AppManager.sb.connect("main_light", self, "_on_main_light")
@@ -411,6 +416,9 @@ func _on_tracker_address(value: String) -> void:
func _on_tracker_port(value: int) -> void:
current_model_config.tracker_port = value
+func _on_use_lip_sync(value: bool) -> void:
+ metadata_config.use_lip_sync = value
+
func _on_camera_select(camera_index: String) -> void:
metadata_config.camera_index = camera_index
diff --git a/utils/LipSyncManager.gd b/utils/LipSyncManager.gd
new file mode 100644
index 00000000..802ec79f
--- /dev/null
+++ b/utils/LipSyncManager.gd
@@ -0,0 +1,125 @@
+class_name LipSyncManager
+extends Node
+
+const LIP_SYNC: String = "res://addons/real-time-lip-sync-gd/lip_sync.gdns"
+const BUFFER: int = 1024
+const BUS_NAME: String = "Record"
+
+var lip_sync: Reference
+var aec: AudioEffectCapture
+var aes: AudioEffectSpectrumAnalyzerInstance
+var asp: AudioStreamPlayer
+
+###############################################################################
+# Builtin functions #
+###############################################################################
+
+func _ready() -> void:
+ connect("tree_exiting", self, "_on_tree_exiting")
+
+ lip_sync = load(LIP_SYNC).new()
+
+ lip_sync.connect("lip_sync_updated", self, "_on_lip_sync_updated")
+ lip_sync.connect("lip_sync_panicked", self, "_on_lip_sync_panicked")
+
+ var bus_index: int = AudioServer.bus_count
+ AudioServer.add_bus(bus_index)
+ AudioServer.set_bus_name(bus_index, BUS_NAME)
+ AudioServer.set_bus_mute(bus_index, true)
+
+ aec = AudioEffectCapture.new()
+ aec.buffer_length = BUFFER
+ AudioServer.add_bus_effect(bus_index, aec)
+
+ var aesa := AudioEffectSpectrumAnalyzer.new()
+ aesa.buffer_length = BUFFER
+ AudioServer.add_bus_effect(bus_index, aesa)
+ aes = AudioServer.get_bus_effect_instance(bus_index, 1)
+
+ asp = AudioStreamPlayer.new()
+ asp.bus = BUS_NAME
+ asp.stream = AudioStreamMicrophone.new()
+ add_child(asp)
+
+func _process(_delta: float) -> void:
+ lip_sync.poll()
+
+ if not AppManager.cm.metadata_config.use_lip_sync:
+ asp.stop()
+ return
+
+ if not asp.playing:
+ asp.play()
+
+ if aec.get_buffer_length_frames() >= BUFFER:
+ var volume = aes.get_magnitude_for_frequency_range(
+ 0,
+ 10000,
+ AudioEffectSpectrumAnalyzerInstance.MAGNITUDE_AVERAGE
+ ).length()
+
+ print(aec.get_buffer(BUFFER))
+
+ if volume > 0.001: # TODO move to config
+ var audio_frames := aec.get_buffer(BUFFER)
+
+ var converted_sample: PoolRealArray = _to_sample(audio_frames)
+
+ lip_sync.update(converted_sample)
+
+ aec.clear_buffer()
+
+###############################################################################
+# Connections #
+###############################################################################
+
+func _on_tree_exiting() -> void:
+ lip_sync.shutdown()
+
+func _on_lip_sync_updated(data: Dictionary) -> void:
+ print(data)
+ AppManager.sb.broadcast_lip_sync_updated(data)
+
+func _on_lip_sync_panicked(message: String) -> void:
+ print(message)
+ pass
+
+###############################################################################
+# Private functions #
+###############################################################################
+
+static func _to_sample(data: PoolVector2Array) -> PoolRealArray:
+ var result := PoolRealArray()
+ result.resize(BUFFER * 2)
+
+ for i in data:
+ var l = clamp(i.x * 32678, -32768, 32768)
+ result.push_back((l - 32768) / 32768)
+
+ var r = clamp(i.y * 32678, -32768, 32768)
+ result.push_back((r - 32768) / 32768)
+
+ return result
+
+# read mic input sample
+# reference (https://godotengine.org/qa/67091/how-to-read-audio-samples-as-1-1-floats)
+static func _read_16bit_samples(stream: PoolByteArray) -> PoolRealArray:
+ var samples = []
+ var i = 0
+ # Read by packs of 2 bytes
+ while i < len(stream):
+ var b0 = stream[i]
+ var b1 = stream[i + 1]
+ # Combine low bits and high bits to obtain 16-bit value
+ var u = b0 | (b1 << 8)
+ # Emulate signed to unsigned 16-bit conversion
+ u = (u + 32768) & 0xffff
+ # Convert to -1..1 range
+ var s = float(u - 32768) / 32768.0
+ samples.append(s)
+ i += 2
+ return PoolRealArray(samples)
+
+###############################################################################
+# Public functions #
+###############################################################################
diff --git a/utils/SignalBroadcaster.gd b/utils/SignalBroadcaster.gd
index 414796c2..ccd1c74c 100644
--- a/utils/SignalBroadcaster.gd
+++ b/utils/SignalBroadcaster.gd
@@ -152,6 +152,10 @@ signal toggle_tracker()
func broadcast_toggle_tracker() -> void:
emit_signal("toggle_tracker")
+signal use_lip_sync(value)
+func broadcast_use_lip_sync(value: bool) -> void:
+ emit_signal("use_lip_sync", value)
+
# TODO started in Tracking.gd, to VRMModel, this isn't great
signal blend_shapes(value)
func broadcast_blend_shapes(value: String) -> void:
@@ -288,6 +292,16 @@ signal model_loaded(model) # Used by model scripts to indicate when they are rea
func broadcast_model_loaded(model: BasicModel) -> void:
emit_signal("model_loaded", model)
+# Lip sync
+
+signal lip_sync_updated(data)
+func broadcast_lip_sync_updated(data: Dictionary) -> void:
+ emit_signal("lip_sync_updated", data)
+
+signal lip_sync_panicked(message)
+func broadcast_lip_sync_panicked(message: String) -> void:
+ emit_signal("lip_sync_panicked", message)
+
# Legacy gui
signal face_tracker_offsets_set()