From b84eaad74856848406ab9d9ff06863b3186c78e8 Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Sun, 14 Aug 2022 22:11:24 +0530 Subject: [PATCH 1/6] Extract parseQuery() Extracts the duplicate code from publisherQuery() and nameQuery() into a new function parseQuery(). --- src/library.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 07eb428cb..c5f4bce3b 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -521,6 +521,16 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) return queryParser.parse_query(normalizeText(filter.getQuery()), flags); } +Xapian::Query parseQuery(const std::string& query, const std::string& prefix) +{ + Xapian::QueryParser queryParser; + queryParser.set_default_op(Xapian::Query::OP_OR); + queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); + const auto flags = 0; + const auto q = queryParser.parse_query(normalizeText(query), flags, prefix); + return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); +} + Xapian::Query nameQuery(const std::string& name) { return Xapian::Query("XN" + normalizeText(name)); @@ -538,22 +548,12 @@ Xapian::Query langQuery(const std::string& lang) Xapian::Query publisherQuery(const std::string& publisher) { - Xapian::QueryParser queryParser; - queryParser.set_default_op(Xapian::Query::OP_OR); - queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); - const auto flags = 0; - const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP"); - return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); + return parseQuery(publisher, "XP"); } Xapian::Query creatorQuery(const std::string& creator) { - Xapian::QueryParser queryParser; - queryParser.set_default_op(Xapian::Query::OP_OR); - queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); - const auto flags = 0; - const auto q = queryParser.parse_query(normalizeText(creator), flags, "A"); - return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); + return parseQuery(creator, "A"); } Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags) From 48a0b3bdc7debb436a5e0c933c8cd7b8284962c2 Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Mon, 15 Aug 2022 20:17:46 +0530 Subject: [PATCH 2/6] Add catalog filtering using ZIM aliasname Adds mechanism to get a ZIM using its alias name. To make a search, one needs to visit: `TLD/?book=aliasNameHere` --- include/library.h | 5 +++++ src/library.cpp | 25 +++++++++++++++++++++++++ src/server/internalServer.cpp | 3 +++ 3 files changed, 33 insertions(+) diff --git a/include/library.h b/include/library.h index 87b0315ea..a663a9b3f 100644 --- a/include/library.h +++ b/include/library.h @@ -67,6 +67,7 @@ class Filter { std::string _query; bool _queryIsPartial; std::string _name; + std::string _aliasName; public: // functions Filter(); @@ -112,6 +113,7 @@ class Filter { Filter& maxSize(size_t size); Filter& query(std::string query, bool partial=true); Filter& name(std::string name); + Filter& aliasName(std::string aliasName); bool hasQuery() const; const std::string& getQuery() const { return _query; } @@ -135,6 +137,9 @@ class Filter { const Tags& getAcceptTags() const { return _acceptTags; } const Tags& getRejectTags() const { return _rejectTags; } + bool hasAliasName() const; + const std::string& getAliasName() const { return _aliasName; } + private: // functions friend class Library; diff --git a/src/library.cpp b/src/library.cpp index c5f4bce3b..88fdd880e 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -461,6 +461,9 @@ void Library::updateBookDB(const Book& book) indexer.index_text(normalizeText(book.getPublisher()), 1, "XP"); indexer.index_text(normalizeText(book.getName()), 1, "XN"); indexer.index_text(normalizeText(book.getCategory()), 1, "XC"); + const auto bookName = book.getHumanReadableIdFromPath(); + const auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); + indexer.index_text(normalizeText(aliasName), 1, "XF"); for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) { doc.add_boolean_term("XT" + tag); @@ -505,6 +508,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("publisher", "XP"); queryParser.add_prefix("creator", "A"); queryParser.add_prefix("tag", "XT"); + queryParser.add_prefix("filename", "XF"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -541,6 +545,11 @@ Xapian::Query categoryQuery(const std::string& category) return Xapian::Query("XC" + normalizeText(category)); } +Xapian::Query aliasNameQuery(const std::string& fileName) +{ + return parseQuery(fileName, "XF"); +} + Xapian::Query langQuery(const std::string& lang) { return Xapian::Query("L" + normalizeText(lang)); @@ -593,6 +602,9 @@ Xapian::Query buildXapianQuery(const Filter& filter) const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags()); q = Xapian::Query(Xapian::Query::OP_AND, q, tq);; } + if ( filter.hasAliasName() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, aliasNameQuery(filter.getAliasName())); + } return q; } @@ -742,6 +754,7 @@ enum filterTypes { QUERY = FLAG(12), NAME = FLAG(13), CATEGORY = FLAG(14), + ALIASNAME = FLAG(15), }; Filter& Filter::local(bool accept) @@ -844,6 +857,13 @@ Filter& Filter::name(std::string name) return *this; } +Filter& Filter::aliasName(std::string aliasName) +{ + _aliasName = aliasName; + activeFilters |= ALIASNAME; + return *this; +} + #define ACTIVE(X) (activeFilters & (X)) #define FILTER(TAG, TEST) if (ACTIVE(TAG) && !(TEST)) { return false; } bool Filter::hasQuery() const @@ -856,6 +876,11 @@ bool Filter::hasName() const return ACTIVE(NAME); } +bool Filter::hasAliasName() const +{ + return ACTIVE(ALIASNAME); +} + bool Filter::hasCategory() const { return ACTIVE(CATEGORY); diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index c2c40cf5a..5e3ab598e 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -119,6 +119,9 @@ Filter get_search_filter(const RequestContext& request, const std::string& prefi try { filter.rejectTags(kiwix::split(request.get_argument(prefix+"notag"), ";")); } catch (...) {} + try { + filter.aliasName(request.get_argument(prefix + "book")); + } catch (...) {} return filter; } From bd38ea97f9137936b7a5aec0a329b7ff5b63f826 Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Mon, 15 Aug 2022 22:12:47 +0530 Subject: [PATCH 3/6] Multivalue support for book query Adds support for putting multiple `book` query parameter. --- include/library.h | 8 ++++---- src/library.cpp | 28 +++++++++++++++------------- src/server/internalServer.cpp | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/library.h b/include/library.h index a663a9b3f..8ac4fb66a 100644 --- a/include/library.h +++ b/include/library.h @@ -54,6 +54,7 @@ enum supportedListMode { class Filter { public: // types using Tags = std::vector; + using AliasNames = std::vector; private: // data uint64_t activeFilters; @@ -67,7 +68,7 @@ class Filter { std::string _query; bool _queryIsPartial; std::string _name; - std::string _aliasName; + AliasNames _aliasNames; public: // functions Filter(); @@ -113,7 +114,7 @@ class Filter { Filter& maxSize(size_t size); Filter& query(std::string query, bool partial=true); Filter& name(std::string name); - Filter& aliasName(std::string aliasName); + Filter& aliasNames(const AliasNames& aliasNames); bool hasQuery() const; const std::string& getQuery() const { return _query; } @@ -137,8 +138,7 @@ class Filter { const Tags& getAcceptTags() const { return _acceptTags; } const Tags& getRejectTags() const { return _rejectTags; } - bool hasAliasName() const; - const std::string& getAliasName() const { return _aliasName; } + const AliasNames& getAliasNames() const { return _aliasNames; } private: // functions friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 88fdd880e..e45d1d488 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -545,9 +545,16 @@ Xapian::Query categoryQuery(const std::string& category) return Xapian::Query("XC" + normalizeText(category)); } -Xapian::Query aliasNameQuery(const std::string& fileName) +Xapian::Query aliasNamesQuery(const Filter::AliasNames& aliasNames) { - return parseQuery(fileName, "XF"); + Xapian::Query q = Xapian::Query(std::string()); + std::vector queryVec; + for (const auto& aliasName : aliasNames) { + queryVec.push_back(parseQuery(aliasName, "XF")); + } + Xapian::Query combinedQuery(Xapian::Query::OP_OR, queryVec.begin(), queryVec.end()); + q = Xapian::Query(Xapian::Query::OP_FILTER, q, combinedQuery); + return q; } Xapian::Query langQuery(const std::string& lang) @@ -602,8 +609,8 @@ Xapian::Query buildXapianQuery(const Filter& filter) const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags()); q = Xapian::Query(Xapian::Query::OP_AND, q, tq);; } - if ( filter.hasAliasName() ) { - q = Xapian::Query(Xapian::Query::OP_AND, q, aliasNameQuery(filter.getAliasName())); + if ( !filter.getAliasNames().empty() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, aliasNamesQuery(filter.getAliasNames())); } return q; } @@ -754,7 +761,7 @@ enum filterTypes { QUERY = FLAG(12), NAME = FLAG(13), CATEGORY = FLAG(14), - ALIASNAME = FLAG(15), + ALIASNAMES = FLAG(15), }; Filter& Filter::local(bool accept) @@ -857,10 +864,10 @@ Filter& Filter::name(std::string name) return *this; } -Filter& Filter::aliasName(std::string aliasName) +Filter& Filter::aliasNames(const AliasNames& aliasNames) { - _aliasName = aliasName; - activeFilters |= ALIASNAME; + _aliasNames = aliasNames; + activeFilters |= ALIASNAMES; return *this; } @@ -876,11 +883,6 @@ bool Filter::hasName() const return ACTIVE(NAME); } -bool Filter::hasAliasName() const -{ - return ACTIVE(ALIASNAME); -} - bool Filter::hasCategory() const { return ACTIVE(CATEGORY); diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 5e3ab598e..5aa1b53ee 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -120,7 +120,7 @@ Filter get_search_filter(const RequestContext& request, const std::string& prefi filter.rejectTags(kiwix::split(request.get_argument(prefix+"notag"), ";")); } catch (...) {} try { - filter.aliasName(request.get_argument(prefix + "book")); + filter.aliasNames(request.get_arguments(prefix + "book")); } catch (...) {} return filter; } From 956c597e80fba8dd8362f7eb68d2a031f955e065 Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Wed, 31 Aug 2022 20:27:16 +0530 Subject: [PATCH 4/6] fixup! Extract parseQuery() --- src/library.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index e45d1d488..1bc360903 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -525,7 +525,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) return queryParser.parse_query(normalizeText(filter.getQuery()), flags); } -Xapian::Query parseQuery(const std::string& query, const std::string& prefix) +Xapian::Query makePhraseQuery(const std::string& query, const std::string& prefix) { Xapian::QueryParser queryParser; queryParser.set_default_op(Xapian::Query::OP_OR); @@ -550,7 +550,7 @@ Xapian::Query aliasNamesQuery(const Filter::AliasNames& aliasNames) Xapian::Query q = Xapian::Query(std::string()); std::vector queryVec; for (const auto& aliasName : aliasNames) { - queryVec.push_back(parseQuery(aliasName, "XF")); + queryVec.push_back(makePhraseQuery(aliasName, "XF")); } Xapian::Query combinedQuery(Xapian::Query::OP_OR, queryVec.begin(), queryVec.end()); q = Xapian::Query(Xapian::Query::OP_FILTER, q, combinedQuery); @@ -564,12 +564,12 @@ Xapian::Query langQuery(const std::string& lang) Xapian::Query publisherQuery(const std::string& publisher) { - return parseQuery(publisher, "XP"); + return makePhraseQuery(publisher, "XP"); } Xapian::Query creatorQuery(const std::string& creator) { - return parseQuery(creator, "A"); + return makePhraseQuery(creator, "A"); } Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags) From 8a3a0b08c246d3b8e4c95987ea7b1b677e92f5b1 Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Thu, 1 Sep 2022 21:17:49 +0530 Subject: [PATCH 5/6] fixup! Add catalog filtering using ZIM aliasname --- include/name_mapper.h | 1 + src/library.cpp | 3 ++- src/name_mapper.cpp | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/name_mapper.h b/include/name_mapper.h index 4247c5c3c..8b8bde620 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -54,6 +54,7 @@ class HumanReadableNameMapper : public NameMapper { virtual ~HumanReadableNameMapper() = default; virtual std::string getNameForId(const std::string& id) const; virtual std::string getIdForName(const std::string& name) const; + static std::string removeDateFromBookId(const std::string& bookId); }; class UpdatableNameMapper : public NameMapper { diff --git a/src/library.cpp b/src/library.cpp index 1bc360903..7b0db0776 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -28,6 +28,7 @@ #include "tools/stringTools.h" #include "tools/otherTools.h" #include "tools/concurrent_cache.h" +#include "name_mapper.h" #include #include @@ -462,7 +463,7 @@ void Library::updateBookDB(const Book& book) indexer.index_text(normalizeText(book.getName()), 1, "XN"); indexer.index_text(normalizeText(book.getCategory()), 1, "XC"); const auto bookName = book.getHumanReadableIdFromPath(); - const auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); + const auto aliasName = HumanReadableNameMapper::removeDateFromBookId(bookName); indexer.index_text(normalizeText(aliasName), 1, "XF"); for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) { diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index dccf40c9b..6877e229b 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -34,7 +34,7 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w if (!withAlias) continue; - auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); + auto aliasName = removeDateFromBookId(bookName); if (aliasName == bookName) { continue; } @@ -51,6 +51,10 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w } } +std::string HumanReadableNameMapper::removeDateFromBookId(const std::string& bookId) { + return replaceRegex(bookId, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); +} + std::string HumanReadableNameMapper::getNameForId(const std::string& id) const { return m_idToName.at(id); } From 2cd057941eafd1315cb76d1518429e8bf93dc0de Mon Sep 17 00:00:00 2001 From: Nikhil Tanwar <2002nikhiltanwar@gmail.com> Date: Fri, 2 Sep 2022 01:24:19 +0530 Subject: [PATCH 6/6] Add test for aliasName filtering Adds test to check if filtering by alias name works. --- test/library.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index ef41c4d7c..92db1a57d 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -500,6 +500,24 @@ TEST_F(LibraryTest, filterByTags) ); } +TEST_F(LibraryTest, filterByAliasNames) +{ + // filtering for one book + EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"zimfile"}), + "Ray Charles" + ); + + // filerting for more than one book + EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"zimfile", "example"}), + "An example ZIM archive", + "Ray Charles" + ); + + // filtering by alias name requires full text match + EXPECT_FILTER_RESULTS(kiwix::Filter().aliasNames({"wrong_name"}), + /* no results */ + ); +} TEST_F(LibraryTest, filterByQuery) {