From a560dffa321af2370454ffe54581ec2f1aedbe5d Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Mon, 7 Aug 2023 00:46:04 -0400 Subject: [PATCH] perf: replace rbtree with vector of pair (#684) --- src/rime/dict/entry_collector.cc | 10 ++++++++-- src/rime/dict/entry_collector.h | 6 ++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/rime/dict/entry_collector.cc b/src/rime/dict/entry_collector.cc index fe864fb31e..cdb7625c5e 100644 --- a/src/rime/dict/entry_collector.cc +++ b/src/rime/dict/entry_collector.cc @@ -4,7 +4,9 @@ // // 2011-11-27 GONG Chen // +#include #include +#include #include #include #include @@ -192,12 +194,14 @@ void EntryCollector::CreateEntry(const string& word, bool is_word = (e->raw_code.size() == 1); if (is_word) { auto& weights = words[e->text]; - if (weights.find(code_str) != weights.end()) { + if (std::find_if(weights.begin(), weights.end(), [&](const auto& p) { + return p.first == code_str; + }) != weights.end()) { LOG(WARNING) << "duplicate word definition '" << e->text << "': [" << code_str << "]."; return; } - weights[code_str] += e->weight; + weights.push_back(std::make_pair(code_str, e->weight)); total_weight[e->text] += e->weight; } entries.emplace_back(std::move(e)); @@ -214,6 +218,8 @@ bool EntryCollector::TranslateWord(const string& word, vector* result) { } const auto& w = words.find(word); if (w != words.end()) { + std::sort(w->second.begin(), w->second.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); for (const auto& v : w->second) { const double kMinimalWeight = 0.05; // 5% double min_weight = total_weight[word] * kMinimalWeight; diff --git a/src/rime/dict/entry_collector.h b/src/rime/dict/entry_collector.h index b7b6105709..17f7267986 100644 --- a/src/rime/dict/entry_collector.h +++ b/src/rime/dict/entry_collector.h @@ -23,8 +23,10 @@ struct RawDictEntry { // code -> weight using WeightMap = map; -// word -> { code -> weight } -using WordMap = hash_map; +// word -> [ { code, weight } ] +// For the sake of memory usage, don't use word -> { code -> weight } as there +// may be many words, but may not be many representations for a word +using WordMap = hash_map>>; // [ (word, weight), ... ] using EncodeQueue = std::queue>;