chore: Add input debounce feature for processing input changes

locaal-ai · Jul 22, 2024 · 812a86c · 812a86c
1 parent 85363d9
commit 812a86c
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 12 deletions.
diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
@@ -9,3 +9,5 @@ Delete_Cached_Models="Delete Cached Models"
 Speed="Speed"
 Line_By_Line="Read Line By Line"
 line_by_line_help="If enabled, the input text or file will be read line by line, otherwise, the entire input text or file will be read at once."
+input_debounce_help="Enable waiting for input changes to end before the input text is processed. This is useful when typing or rapid changes appear in the input text or file."
+input_debounce="Input Debounce"
diff --git a/src/input-thread.cpp b/src/input-thread.cpp
@@ -12,7 +12,22 @@
 
 #include "plugin-support.h"
 
-InputThread::InputThread() : running(false), interval(1000) {}
+namespace {
+uint64_t now_ms()
+{
+	return std::chrono::duration_cast<std::chrono::milliseconds>(
+		       std::chrono::system_clock::now().time_since_epoch())
+		.count();
+}
+} // namespace
+
+InputThread::InputThread()
+	: running(false),
+	  interval(1000),
+	  lastChangeTimeFile(now_ms()),
+	  lastChangeTimeSource(now_ms())
+{
+}
 
 void InputThread::run()
 {
@@ -40,9 +55,9 @@ void InputThread::run()
 				}
 			}
 			if (fileContents != lastFileValue) {
-				// Invoke speech generation if it has changed
 				new_content_for_generation = fileContents;
-				lastFileValue = fileContents;
+				this->lastFileValue = fileContents;
+				this->lastChangeTimeFile = now_ms();
 			}
 		}
 
@@ -59,15 +74,32 @@ void InputThread::run()
 						obs_data_get_string(sourceSettings, "text");
 					obs_data_release(sourceSettings);
 					if (text && lastOBSTextSourceValue != text) {
-						// Invoke speech generation if it has changed
 						new_content_for_generation = text;
-						lastOBSTextSourceValue = text;
+						this->lastOBSTextSourceValue = text;
+						this->lastChangeTimeSource = now_ms();
 					}
 				}
 				obs_source_release(source);
 			}
 		}
 
+		if (debounceMode == DebouceMode::Debounced) {
+			// If debounce mode is enabled, wait for a certain interval before
+			// generating speech
+			uint64_t currentTime = now_ms();
+			uint64_t timeSinceLastChangeFile = currentTime - lastChangeTimeFile;
+			uint64_t timeSinceLastChangeSource = currentTime - lastChangeTimeSource;
+			if (timeSinceLastChangeFile > interval &&
+			    timeSinceLastChangeFile < (interval * 2)) {
+				new_content_for_generation = lastFileValue;
+			} else if (timeSinceLastChangeSource > interval &&
+				   timeSinceLastChangeSource < (interval * 2)) {
+				new_content_for_generation = lastOBSTextSourceValue;
+			} else {
+				new_content_for_generation.clear();
+			}
+		}
+
 		if (!new_content_for_generation.empty() && speechGenerationCallback) {
 			std::thread generationThread([this, new_content_for_generation]() {
 				obs_log(LOG_DEBUG, "Generating speech from input: %s",

diff --git a/src/input-thread.h b/src/input-thread.h
@@ -8,6 +8,7 @@
 #include <functional>
 
 enum class ReadingMode { Whole, LineByLine };
+enum class DebouceMode { Debounced, Immediate };
 
 class InputThread {
 public:
@@ -33,6 +34,7 @@ class InputThread {
 	void setFile(const std::string &filePath) { file = filePath; }
 	void setReadingMode(ReadingMode mode) { readingMode = mode; }
 	void setInterval(uint32_t milliseconds) { interval = milliseconds; }
+	void setDebounceMode(DebouceMode mode) { debounceMode = mode; }
 
 	void setOBSTextSource(const std::string &sourceName) { obsTextSource = sourceName; }
 
@@ -51,6 +53,10 @@ class InputThread {
 	std::string lastFileValue;
 	std::string lastOBSTextSourceValue;
 	ReadingMode readingMode = ReadingMode::Whole;
+	DebouceMode debounceMode = DebouceMode::Debounced;
+	uint64_t lastChangeTimeFile = 0;
+	uint64_t lastChangeTimeSource = 0;
+	bool debounceGenerated = false;
 
 	void run();
 };

diff --git a/src/squawk-source.cpp b/src/squawk-source.cpp
@@ -77,6 +77,7 @@ void squawk_source_defaults(obs_data_t *settings)
 	obs_data_set_default_string(settings, "file", "");
 	obs_data_set_default_bool(settings, "line_by_line", false);
 	obs_data_set_default_bool(settings, "phonetic_transcription", true);
+	obs_data_set_default_bool(settings, "input_debounce", true);
 }
 
 bool add_sources_to_list(void *list_property, obs_source_t *source)
@@ -144,25 +145,34 @@ obs_properties_t *squawk_source_properties(void *data)
 		data);
 
 	// add speaker id property
-	obs_properties_add_int(ppts, "speaker_id", MT_("Speaker_ID"), 0, 100, 1);
+	obs_properties_add_int(ppts, "speaker_id", MT_("Speaker_ID"), 0, 1000, 1);
 
 	// add a speed slider between 0.1 and 2.5
 	obs_properties_add_float_slider(ppts, "speed", MT_("Speed"), 0.1, 2.5, 0.1);
 
+	// add "inputs" group
+	obs_properties_t *inputs_group = obs_properties_create();
+	obs_properties_add_group(ppts, "inputs", MT_("Inputs"), OBS_GROUP_NORMAL, inputs_group);
 	// add input source selection dropdown property
-	obs_property_t *input_source = obs_properties_add_list(
-		ppts, "input_source", "Input Source", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
+	obs_property_t *input_source = obs_properties_add_list(inputs_group, "input_source",
+							       "Input Source", OBS_COMBO_TYPE_LIST,
+							       OBS_COMBO_FORMAT_STRING);
 	// Add "none" option
 	obs_property_list_add_string(input_source, MT_("none_no_input"), "none");
 	// Add text sources
 	obs_enum_sources(add_sources_to_list, input_source);
 	// add file property
-	obs_properties_add_path(ppts, "file", MT_("File"), OBS_PATH_FILE, nullptr, nullptr);
+	obs_properties_add_path(inputs_group, "file", MT_("File"), OBS_PATH_FILE, nullptr, nullptr);
 	// add line-by-line boolean property
-	obs_properties_add_bool(ppts, "line_by_line", MT_("Line_By_Line"));
+	obs_property_t *lbl_prop =
+		obs_properties_add_bool(inputs_group, "line_by_line", MT_("Line_By_Line"));
 	// add help text for line-by-line
-	obs_property_set_long_description(obs_properties_get(ppts, "line_by_line"),
-					  MT_("line_by_line_help"));
+	obs_property_set_long_description(lbl_prop, MT_("line_by_line_help"));
+	// add boolean property for enabling input debounce
+	obs_property_t *debouce_prop =
+		obs_properties_add_bool(inputs_group, "input_debounce", MT_("Input_Debounce"));
+	// add help text for input debounce
+	obs_property_set_long_description(debouce_prop, MT_("input_debounce_help"));
 
 	// add text property
 	obs_properties_add_text(ppts, "text", MT_("Text"), OBS_TEXT_DEFAULT);
@@ -245,6 +255,9 @@ void squawk_source_update(void *data, obs_data_t *settings)
 	squawk_data->inputThread->setReadingMode(obs_data_get_bool(settings, "line_by_line")
 							 ? ReadingMode::LineByLine
 							 : ReadingMode::Whole);
+	squawk_data->inputThread->setDebounceMode(obs_data_get_bool(settings, "input_debounce")
+							  ? DebouceMode::Debounced
+							  : DebouceMode::Immediate);
 
 	std::string new_model_name = obs_data_get_string(settings, "model");
 	if (new_model_name != squawk_data->tts_context.model_name) {