From 623ce94ef6782a91a4fa96976437e88a253e4766 Mon Sep 17 00:00:00 2001 From: msm Date: Wed, 14 Dec 2022 23:24:06 +0100 Subject: [PATCH 1/3] Performance improvement 3: simplify minof expressions --- libursa/Query.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libursa/Query.cpp b/libursa/Query.cpp index 962b8d6..8f932f5 100644 --- a/libursa/Query.cpp +++ b/libursa/Query.cpp @@ -280,6 +280,11 @@ Query Query::plan(const std::unordered_set &types_to_query) const { plans.emplace_back(query.plan(types_to_query)); } if (type == QueryType::MIN_OF) { + if (count == 1) { + return Query(QueryType::OR, std::move(plans)); + } else if (count == plans.size()) { + return Query(QueryType::AND, std::move(plans)); + } return Query(count, std::move(plans)); } return Query(type, std::move(plans)); From 717573caa56eb621e368a9c949759c932dbb745d Mon Sep 17 00:00:00 2001 From: msm Date: Thu, 15 Dec 2022 02:04:55 +0100 Subject: [PATCH 2/3] Expand on the idea, also optimize ANDs and ORs --- libursa/Query.cpp | 63 +++++++++++++++++++++++++++++------------------ libursa/Query.h | 13 ++++------ 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/libursa/Query.cpp b/libursa/Query.cpp index 8f932f5..7c719b2 100644 --- a/libursa/Query.cpp +++ b/libursa/Query.cpp @@ -69,12 +69,9 @@ const QString &Query::as_value() const { std::string Query::as_string_repr() const { std::string out = ""; - if (!query_plan.empty()) { + if (ngram != std::nullopt) { // Query is already after planning stage. Show low-level representation. - for (const auto &token : query_plan) { - out += fmt::format("[{:x}]", token.trigram); - } - return out; + return fmt::format("{:x}", ngram->trigram); } // No query plan yet. Show stringlike representation. for (const auto &token : value) { @@ -274,23 +271,47 @@ std::vector plan_qstring( } Query Query::plan(const std::unordered_set &types_to_query) const { - if (type != QueryType::PRIMITIVE) { + if (type == QueryType::PRIMITIVE) { + if (ngram != std::nullopt) { + // Query already as simple as possible + return Query(*ngram); + } + + auto ngrams = plan_qstring(types_to_query, value); std::vector plans; - for (const auto &query : queries) { - plans.emplace_back(query.plan(types_to_query)); + for (const auto gram : ngrams) { + plans.emplace_back(Query(gram)); } - if (type == QueryType::MIN_OF) { - if (count == 1) { - return Query(QueryType::OR, std::move(plans)); - } else if (count == plans.size()) { - return Query(QueryType::AND, std::move(plans)); - } - return Query(count, std::move(plans)); + return Query(QueryType::AND, std::move(plans)); + } + + std::vector plans; + for (const auto &query : queries) { + plans.emplace_back(query.plan(types_to_query)); + } + + if (type == QueryType::MIN_OF) { + if (count == 1) { + return Query(QueryType::OR, std::move(plans)).plan(types_to_query); + } + if (count == plans.size()) { + return Query(QueryType::AND, std::move(plans)).plan(types_to_query); } - return Query(type, std::move(plans)); + return Query(count, std::move(plans)); } - return Query(plan_qstring(types_to_query, value)); + // For all other types (AND and OR), rewrite and simplify recursively + std::vector new_plans; + for (auto it = plans.begin(); it != plans.end(); it++) { + if (it->type == type) { + for (auto &subplan : it->queries) { + new_plans.emplace_back(std::move(subplan)); + } + } else { + new_plans.emplace_back(std::move(*it)); + } + } + return Query(type, std::move(new_plans)); } QueryResult Query::run(const QueryPrimitive &primitive, @@ -298,13 +319,7 @@ QueryResult Query::run(const QueryPrimitive &primitive, // Case: primitive query - reduces to AND with tokens from query plan. if (type == QueryType::PRIMITIVE) { auto result = QueryResult::everything(); - for (const auto &token : query_plan) { - result.do_and(primitive(token, counters), &counters->ands()); - if (result.is_empty()) { - break; - } - } - return result; + return primitive(*ngram, counters); } // Case: and. Short circuits when result is already empty. if (type == QueryType::AND) { diff --git a/libursa/Query.h b/libursa/Query.h index 3c02e7e..0a602c9 100644 --- a/libursa/Query.h +++ b/libursa/Query.h @@ -39,8 +39,7 @@ using QueryPrimitive = // will actually be checked. class Query { private: - Query(const Query &other) - : type(other.type), query_plan(), count(other.count) { + Query(const Query &other) : type(other.type), ngram(), count(other.count) { queries.reserve(other.queries.size()); for (const auto &query : other.queries) { queries.emplace_back(query.clone()); @@ -51,10 +50,8 @@ class Query { } } - explicit Query(std::vector &&query_plan) - : type(QueryType::PRIMITIVE), - query_plan(std::move(query_plan)), - value() {} + explicit Query(PrimitiveQuery ngram) + : type(QueryType::PRIMITIVE), ngram(ngram), value() {} public: explicit Query(QString &&qstr); @@ -78,8 +75,8 @@ class Query { private: QueryType type; // used for QueryType::PRIMITIVE - QString value; // before plan() - std::vector query_plan; // after plan() + QString value; // before plan() + std::optional ngram; // after plan() // used for QueryType::MIN_OF uint32_t count; // used for QueryType::AND/OR/MIN_OF From f434d34ac5291911fe71cbcbf4ab2f8a6180e9bd Mon Sep 17 00:00:00 2001 From: msm Date: Thu, 15 Dec 2022 17:26:22 +0100 Subject: [PATCH 3/3] A few comments here and there --- libursa/Query.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libursa/Query.cpp b/libursa/Query.cpp index 7c719b2..26d1f43 100644 --- a/libursa/Query.cpp +++ b/libursa/Query.cpp @@ -277,9 +277,8 @@ Query Query::plan(const std::unordered_set &types_to_query) const { return Query(*ngram); } - auto ngrams = plan_qstring(types_to_query, value); std::vector plans; - for (const auto gram : ngrams) { + for (const auto gram : plan_qstring(types_to_query, value)) { plans.emplace_back(Query(gram)); } return Query(QueryType::AND, std::move(plans)); @@ -290,6 +289,7 @@ Query Query::plan(const std::unordered_set &types_to_query) const { plans.emplace_back(query.plan(types_to_query)); } + // Special case `1 of ...` (OR) and `n of (1, 2, ... n)` (AND). if (type == QueryType::MIN_OF) { if (count == 1) { return Query(QueryType::OR, std::move(plans)).plan(types_to_query); @@ -300,7 +300,7 @@ Query Query::plan(const std::unordered_set &types_to_query) const { return Query(count, std::move(plans)); } - // For all other types (AND and OR), rewrite and simplify recursively + // For all other types (AND and OR), flatten and simplify recursively std::vector new_plans; for (auto it = plans.begin(); it != plans.end(); it++) { if (it->type == type) {