Skip to content

Commit

Permalink
chore: Add input debounce feature for processing input changes
Browse files Browse the repository at this point in the history
  • Loading branch information
royshil committed Jul 22, 2024
1 parent 85363d9 commit 812a86c
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 12 deletions.
2 changes: 2 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ Delete_Cached_Models="Delete Cached Models"
Speed="Speed"
Line_By_Line="Read Line By Line"
line_by_line_help="If enabled, the input text or file will be read line by line, otherwise, the entire input text or file will be read at once."
input_debounce_help="Enable waiting for input changes to end before the input text is processed. This is useful when typing or rapid changes appear in the input text or file."
input_debounce="Input Debounce"
42 changes: 37 additions & 5 deletions src/input-thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,22 @@

#include "plugin-support.h"

InputThread::InputThread() : running(false), interval(1000) {}
namespace {
uint64_t now_ms()
{
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
}
} // namespace

InputThread::InputThread()
: running(false),
interval(1000),
lastChangeTimeFile(now_ms()),
lastChangeTimeSource(now_ms())
{
}

void InputThread::run()
{
Expand Down Expand Up @@ -40,9 +55,9 @@ void InputThread::run()
}
}
if (fileContents != lastFileValue) {
// Invoke speech generation if it has changed
new_content_for_generation = fileContents;
lastFileValue = fileContents;
this->lastFileValue = fileContents;
this->lastChangeTimeFile = now_ms();
}
}

Expand All @@ -59,15 +74,32 @@ void InputThread::run()
obs_data_get_string(sourceSettings, "text");
obs_data_release(sourceSettings);
if (text && lastOBSTextSourceValue != text) {
// Invoke speech generation if it has changed
new_content_for_generation = text;
lastOBSTextSourceValue = text;
this->lastOBSTextSourceValue = text;
this->lastChangeTimeSource = now_ms();
}
}
obs_source_release(source);
}
}

if (debounceMode == DebouceMode::Debounced) {
// If debounce mode is enabled, wait for a certain interval before
// generating speech
uint64_t currentTime = now_ms();
uint64_t timeSinceLastChangeFile = currentTime - lastChangeTimeFile;
uint64_t timeSinceLastChangeSource = currentTime - lastChangeTimeSource;
if (timeSinceLastChangeFile > interval &&
timeSinceLastChangeFile < (interval * 2)) {
new_content_for_generation = lastFileValue;
} else if (timeSinceLastChangeSource > interval &&
timeSinceLastChangeSource < (interval * 2)) {
new_content_for_generation = lastOBSTextSourceValue;
} else {
new_content_for_generation.clear();
}
}

if (!new_content_for_generation.empty() && speechGenerationCallback) {
std::thread generationThread([this, new_content_for_generation]() {
obs_log(LOG_DEBUG, "Generating speech from input: %s",
Expand Down
6 changes: 6 additions & 0 deletions src/input-thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <functional>

enum class ReadingMode { Whole, LineByLine };
enum class DebouceMode { Debounced, Immediate };

class InputThread {
public:
Expand All @@ -33,6 +34,7 @@ class InputThread {
void setFile(const std::string &filePath) { file = filePath; }
void setReadingMode(ReadingMode mode) { readingMode = mode; }
void setInterval(uint32_t milliseconds) { interval = milliseconds; }
void setDebounceMode(DebouceMode mode) { debounceMode = mode; }

void setOBSTextSource(const std::string &sourceName) { obsTextSource = sourceName; }

Expand All @@ -51,6 +53,10 @@ class InputThread {
std::string lastFileValue;
std::string lastOBSTextSourceValue;
ReadingMode readingMode = ReadingMode::Whole;
DebouceMode debounceMode = DebouceMode::Debounced;
uint64_t lastChangeTimeFile = 0;
uint64_t lastChangeTimeSource = 0;
bool debounceGenerated = false;

void run();
};
Expand Down
27 changes: 20 additions & 7 deletions src/squawk-source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ void squawk_source_defaults(obs_data_t *settings)
obs_data_set_default_string(settings, "file", "");
obs_data_set_default_bool(settings, "line_by_line", false);
obs_data_set_default_bool(settings, "phonetic_transcription", true);
obs_data_set_default_bool(settings, "input_debounce", true);
}

bool add_sources_to_list(void *list_property, obs_source_t *source)
Expand Down Expand Up @@ -144,25 +145,34 @@ obs_properties_t *squawk_source_properties(void *data)
data);

// add speaker id property
obs_properties_add_int(ppts, "speaker_id", MT_("Speaker_ID"), 0, 100, 1);
obs_properties_add_int(ppts, "speaker_id", MT_("Speaker_ID"), 0, 1000, 1);

// add a speed slider between 0.1 and 2.5
obs_properties_add_float_slider(ppts, "speed", MT_("Speed"), 0.1, 2.5, 0.1);

// add "inputs" group
obs_properties_t *inputs_group = obs_properties_create();
obs_properties_add_group(ppts, "inputs", MT_("Inputs"), OBS_GROUP_NORMAL, inputs_group);
// add input source selection dropdown property
obs_property_t *input_source = obs_properties_add_list(
ppts, "input_source", "Input Source", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
obs_property_t *input_source = obs_properties_add_list(inputs_group, "input_source",
"Input Source", OBS_COMBO_TYPE_LIST,
OBS_COMBO_FORMAT_STRING);
// Add "none" option
obs_property_list_add_string(input_source, MT_("none_no_input"), "none");
// Add text sources
obs_enum_sources(add_sources_to_list, input_source);
// add file property
obs_properties_add_path(ppts, "file", MT_("File"), OBS_PATH_FILE, nullptr, nullptr);
obs_properties_add_path(inputs_group, "file", MT_("File"), OBS_PATH_FILE, nullptr, nullptr);
// add line-by-line boolean property
obs_properties_add_bool(ppts, "line_by_line", MT_("Line_By_Line"));
obs_property_t *lbl_prop =
obs_properties_add_bool(inputs_group, "line_by_line", MT_("Line_By_Line"));
// add help text for line-by-line
obs_property_set_long_description(obs_properties_get(ppts, "line_by_line"),
MT_("line_by_line_help"));
obs_property_set_long_description(lbl_prop, MT_("line_by_line_help"));
// add boolean property for enabling input debounce
obs_property_t *debouce_prop =
obs_properties_add_bool(inputs_group, "input_debounce", MT_("Input_Debounce"));
// add help text for input debounce
obs_property_set_long_description(debouce_prop, MT_("input_debounce_help"));

// add text property
obs_properties_add_text(ppts, "text", MT_("Text"), OBS_TEXT_DEFAULT);
Expand Down Expand Up @@ -245,6 +255,9 @@ void squawk_source_update(void *data, obs_data_t *settings)
squawk_data->inputThread->setReadingMode(obs_data_get_bool(settings, "line_by_line")
? ReadingMode::LineByLine
: ReadingMode::Whole);
squawk_data->inputThread->setDebounceMode(obs_data_get_bool(settings, "input_debounce")
? DebouceMode::Debounced
: DebouceMode::Immediate);

std::string new_model_name = obs_data_get_string(settings, "model");
if (new_model_name != squawk_data->tts_context.model_name) {
Expand Down

0 comments on commit 812a86c

Please sign in to comment.