Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvement 3: simplify queries #195

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,9 @@ const QString &Query::as_value() const {

std::string Query::as_string_repr() const {
std::string out = "";
if (!query_plan.empty()) {
if (ngram != std::nullopt) {
// Query is already after planning stage. Show low-level representation.
for (const auto &token : query_plan) {
out += fmt::format("[{:x}]", token.trigram);
}
return out;
return fmt::format("{:x}", ngram->trigram);
}
// No query plan yet. Show stringlike representation.
for (const auto &token : value) {
Expand Down Expand Up @@ -274,32 +271,55 @@ std::vector<PrimitiveQuery> plan_qstring(
}

Query Query::plan(const std::unordered_set<IndexType> &types_to_query) const {
if (type != QueryType::PRIMITIVE) {
if (type == QueryType::PRIMITIVE) {
if (ngram != std::nullopt) {
// Query already as simple as possible
return Query(*ngram);
}

std::vector<Query> plans;
for (const auto &query : queries) {
plans.emplace_back(query.plan(types_to_query));
for (const auto gram : plan_qstring(types_to_query, value)) {
plans.emplace_back(Query(gram));
}
if (type == QueryType::MIN_OF) {
return Query(count, std::move(plans));
return Query(QueryType::AND, std::move(plans));
}

std::vector<Query> plans;
for (const auto &query : queries) {
plans.emplace_back(query.plan(types_to_query));
}

// Special case `1 of ...` (OR) and `n of (1, 2, ... n)` (AND).
if (type == QueryType::MIN_OF) {
if (count == 1) {
return Query(QueryType::OR, std::move(plans)).plan(types_to_query);
}
return Query(type, std::move(plans));
if (count == plans.size()) {
return Query(QueryType::AND, std::move(plans)).plan(types_to_query);
}
return Query(count, std::move(plans));
}

return Query(plan_qstring(types_to_query, value));
// For all other types (AND and OR), flatten and simplify recursively
std::vector<Query> new_plans;
for (auto it = plans.begin(); it != plans.end(); it++) {
if (it->type == type) {
for (auto &subplan : it->queries) {
new_plans.emplace_back(std::move(subplan));
}
} else {
new_plans.emplace_back(std::move(*it));
}
}
return Query(type, std::move(new_plans));
}

QueryResult Query::run(const QueryPrimitive &primitive,
QueryCounters *counters) const {
// Case: primitive query - reduces to AND with tokens from query plan.
if (type == QueryType::PRIMITIVE) {
auto result = QueryResult::everything();
for (const auto &token : query_plan) {
result.do_and(primitive(token, counters), &counters->ands());
if (result.is_empty()) {
break;
}
}
return result;
return primitive(*ngram, counters);
}
// Case: and. Short circuits when result is already empty.
if (type == QueryType::AND) {
Expand Down
13 changes: 5 additions & 8 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ using QueryPrimitive =
// will actually be checked.
class Query {
private:
Query(const Query &other)
: type(other.type), query_plan(), count(other.count) {
Query(const Query &other) : type(other.type), ngram(), count(other.count) {
queries.reserve(other.queries.size());
for (const auto &query : other.queries) {
queries.emplace_back(query.clone());
Expand All @@ -51,10 +50,8 @@ class Query {
}
}

explicit Query(std::vector<PrimitiveQuery> &&query_plan)
: type(QueryType::PRIMITIVE),
query_plan(std::move(query_plan)),
value() {}
explicit Query(PrimitiveQuery ngram)
: type(QueryType::PRIMITIVE), ngram(ngram), value() {}

public:
explicit Query(QString &&qstr);
Expand All @@ -78,8 +75,8 @@ class Query {
private:
QueryType type;
// used for QueryType::PRIMITIVE
QString value; // before plan()
std::vector<PrimitiveQuery> query_plan; // after plan()
QString value; // before plan()
std::optional<PrimitiveQuery> ngram; // after plan()
// used for QueryType::MIN_OF
uint32_t count;
// used for QueryType::AND/OR/MIN_OF
Expand Down