diff --git a/ydb/core/viewer/json_autocomplete.h b/ydb/core/viewer/json_autocomplete.h index 46198c9e12dd..480f6ccb5321 100644 --- a/ydb/core/viewer/json_autocomplete.h +++ b/ydb/core/viewer/json_autocomplete.h @@ -80,6 +80,7 @@ class TJsonAutocomplete : public TViewerPipeClient { Tables.emplace_back(table); } Prefix = request.GetPrefix(); + Limit = request.GetLimit(); Timeout = ViewerRequest->Get()->Record.GetTimeout(); Direct = true; @@ -112,6 +113,9 @@ class TJsonAutocomplete : public TViewerPipeClient { } else { SearchWord = Prefix; } + if (Limit == 0) { + Limit = std::numeric_limits::max(); + } } void ParseCgiParameters(const TCgiParameters& params) { @@ -137,6 +141,9 @@ class TJsonAutocomplete : public TViewerPipeClient { } } Prefix = Prefix.empty() ? requestData["prefix"].GetStringSafe({}) : Prefix; + if (requestData["limit"].IsDefined()) { + Limit = requestData["limit"].GetInteger(); + } } } @@ -234,6 +241,7 @@ class TJsonAutocomplete : public TViewerPipeClient { autocompleteRequest->AddTables(path); } autocompleteRequest->SetPrefix(Prefix); + autocompleteRequest->SetLimit(Limit); ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kAutocompleteRequest, nodeId); SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); diff --git a/ydb/core/viewer/protos/viewer.proto b/ydb/core/viewer/protos/viewer.proto index fa78df8ce007..e194ad7ac6e8 100644 --- a/ydb/core/viewer/protos/viewer.proto +++ b/ydb/core/viewer/protos/viewer.proto @@ -532,6 +532,7 @@ message TSchemeCacheRequest { string Database = 1; repeated string Tables = 2; string Prefix = 3; + uint32 Limit = 4; } message TEvViewerRequest { diff --git a/ydb/core/viewer/query_autocomplete_helper.h b/ydb/core/viewer/query_autocomplete_helper.h index edde4796bfe0..e523ceb029bb 100644 --- a/ydb/core/viewer/query_autocomplete_helper.h +++ b/ydb/core/viewer/query_autocomplete_helper.h @@ -5,6 +5,8 @@ namespace NKikimr::NViewer { inline ui32 LevenshteinDistance(TString word1, TString word2) { + word1 = to_lower(word1); + word2 = to_lower(word2); ui32 size1 = word1.size(); ui32 size2 = word2.size(); ui32 dist[size1 + 1][size2 + 1]; // distance matrix @@ -32,23 +34,50 @@ inline ui32 LevenshteinDistance(TString word1, TString word2) { template class FuzzySearcher { struct WordHit { - ui32 Distance; + bool Contains; + ui32 LengthDifference; + ui32 LevenshteinDistance; Type Data; - WordHit(ui32 dist, Type data) - : Distance(dist) + WordHit(bool contains, ui32 lengthDifference, ui32 levenshteinDistance, Type data) + : Contains(contains) + , LengthDifference(lengthDifference) + , LevenshteinDistance(levenshteinDistance) , Data(data) {} bool operator<(const WordHit& other) const { - return Distance < other.Distance; + if (this->Contains && !other.Contains) { + return true; + } + if (this->Contains && other.Contains) { + return this->LengthDifference < other.LengthDifference; + } + return this->LevenshteinDistance < other.LevenshteinDistance; } bool operator>(const WordHit& other) const { - return Distance > other.Distance; + if (!this->Contains && other.Contains) { + return true; + } + if (this->Contains && other.Contains) { + return this->LengthDifference > other.LengthDifference; + } + return this->LevenshteinDistance > other.LevenshteinDistance; } }; + static WordHit CalculateWordHit(TString searchWord, TString testWord, Type testData) { + searchWord = to_lower(searchWord); + testWord = to_lower(testWord); + if (testWord.Contains(searchWord)) { + return {1, static_cast(testWord.length() - searchWord.length()), 0, testData}; + } else { + ui32 levenshteinDistance = LevenshteinDistance(searchWord, testWord); + return {0, 0, levenshteinDistance, testData}; + } + } + public: THashMap Dictionary; @@ -63,15 +92,15 @@ class FuzzySearcher { TVector Search(const TString& searchWord, ui32 limit = 10) { auto cmp = [](const WordHit& left, const WordHit& right) { - return left.Distance < right.Distance; + return left < right; }; std::priority_queue, decltype(cmp)> queue(cmp); for (const auto& [word, data]: Dictionary) { - auto wordHit = WordHit(LevenshteinDistance(searchWord, word), data); + auto wordHit = CalculateWordHit(searchWord, word, data); if (queue.size() < limit) { queue.emplace(wordHit); - } else if (wordHit.Distance < queue.top().Distance) { + } else if (queue.size() > 0 && wordHit < queue.top()) { queue.pop(); queue.emplace(wordHit); } diff --git a/ydb/core/viewer/viewer_ut.cpp b/ydb/core/viewer/viewer_ut.cpp index 7003dbcf5c25..5925e8cc8692 100644 --- a/ydb/core/viewer/viewer_ut.cpp +++ b/ydb/core/viewer/viewer_ut.cpp @@ -1040,6 +1040,7 @@ Y_UNIT_TEST_SUITE(Viewer) { UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("abc", ""), 3); UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "apple"), 0); UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "aple"), 1); + UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("UPPER", "upper"), 0); UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("horse", "ros"), 3); UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("intention", "execution"), 5); UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice"), 3); @@ -1048,64 +1049,48 @@ Y_UNIT_TEST_SUITE(Viewer) { UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice/db26000"), 5); } - Y_UNIT_TEST(FuzzySearcher) - { - TVector dictionary = { "/slice", "/slice/db", "/slice/db26000" }; + TVector SimilarWordsDictionary = { "/slice", "/slice/db", "/slice/db26000" }; + TVector DifferentWordsDictionary = { "/orders", "/peoples", "/OrdinaryScheduleTables" }; - { - TVector expectations = { "/slice/db" }; - auto fuzzy = FuzzySearcher(dictionary); - auto result = fuzzy.Search("/slice/db", 1); + void FuzzySearcherTest(TVector& dictionary, TString search, ui32 limit, TVector expectations) { + auto fuzzy = FuzzySearcher(dictionary); + auto result = fuzzy.Search(search, limit); - UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); - for (ui32 i = 0; i < expectations.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); - } - } - - { - TVector expectations = { "/slice/db", "/slice" }; - auto fuzzy = FuzzySearcher(dictionary); - auto result = fuzzy.Search("/slice/db", 2); - - UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); - for (ui32 i = 0; i < expectations.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); - } + UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); + for (ui32 i = 0; i < expectations.size(); i++) { + UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); } + } - { - TVector expectations = { "/slice/db", "/slice", "/slice/db26000"}; - auto fuzzy = FuzzySearcher(dictionary); - auto result = fuzzy.Search("/slice/db", 3); + Y_UNIT_TEST(FuzzySearcherLimit1OutOf4) + { + FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 1, { "/slice/db" }); + } - UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); - for (ui32 i = 0; i < expectations.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); - } - } + Y_UNIT_TEST(FuzzySearcherLimit2OutOf4) + { + FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 2, { "/slice/db", "/slice/db26000" }); + } - { - TVector expectations = { "/slice/db", "/slice", "/slice/db26000" }; - auto fuzzy = FuzzySearcher(dictionary); - auto result = fuzzy.Search("/slice/db", 4); + Y_UNIT_TEST(FuzzySearcherLimit3OutOf4) + { + FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 3, { "/slice/db", "/slice/db26000", "/slice"}); + } - UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); - for (ui32 i = 0; i < expectations.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); - } - } + Y_UNIT_TEST(FuzzySearcherLimit4OutOf4) + { + FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 4, { "/slice/db", "/slice/db26000", "/slice"}); + } - { - TVector expectations = { "/slice/db26000", "/slice/db", "/slice" }; - auto fuzzy = FuzzySearcher(dictionary); - auto result = fuzzy.Search("/slice/db26001"); + Y_UNIT_TEST(FuzzySearcherLongWord) + { + FuzzySearcherTest(SimilarWordsDictionary, "/slice/db26001", 10, { "/slice/db26000", "/slice/db", "/slice"}); + } - UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size()); - for (ui32 i = 0; i < expectations.size(); i++) { - UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]); - } - } + Y_UNIT_TEST(FuzzySearcherPriority) + { + FuzzySearcherTest(DifferentWordsDictionary, "/ord", 10, { "/orders", "/OrdinaryScheduleTables", "/peoples"}); + FuzzySearcherTest(DifferentWordsDictionary, "Tables", 10, { "/OrdinaryScheduleTables", "/orders", "/peoples"}); } void JsonAutocompleteTest(HTTP_METHOD method, NJson::TJsonValue& value, TString prefix = "", TString database = "", TVector tables = {}, ui32 limit = 10, bool lowerCaseContentType = false) { @@ -1136,6 +1121,7 @@ Y_UNIT_TEST_SUITE(Viewer) { if (prefix) { httpReq.CgiParameters.emplace("prefix", prefix); } + httpReq.CgiParameters.emplace("limit", ToString(limit)); } else if (method == HTTP_METHOD_POST) { NJson::TJsonArray tableArray; for (const TString& table : tables) { @@ -1145,13 +1131,13 @@ Y_UNIT_TEST_SUITE(Viewer) { NJson::TJsonValue root = NJson::TJsonMap{ {"database", database}, {"table", tableArray}, - {"prefix", prefix} + {"prefix", prefix}, + {"limit", limit} }; httpReq.PostContent = NJson::WriteJson(root); - auto contType = lowerCaseContentType ? "content-type" : "Content-Type"; - httpReq.HttpHeaders.AddHeader(contType, "application/json"); + auto contentType = lowerCaseContentType ? "content-type" : "Content-Type"; + httpReq.HttpHeaders.AddHeader(contentType, "application/json"); } - httpReq.CgiParameters.emplace("limit", ToString(limit)); httpReq.CgiParameters.emplace("direct", "1"); auto page = MakeHolder("viewer", "title"); TMonService2HttpRequest monReq(nullptr, &httpReq, nullptr, page.Get(), "/json/autocomplete", nullptr); @@ -1236,7 +1222,7 @@ Y_UNIT_TEST_SUITE(Viewer) { }); } - Y_UNIT_TEST(JsonAutocompleteDatabase) { + Y_UNIT_TEST(JsonAutocompleteStartOfDatabaseName) { NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root"); VerifyJsonAutocompleteSuccess(value, { @@ -1246,16 +1232,22 @@ Y_UNIT_TEST_SUITE(Viewer) { "/Root/MyDatabase", "/Root/TestDatabase" }); + } + Y_UNIT_TEST(JsonAutocompleteEndOfDatabaseName) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "Database"); VerifyJsonAutocompleteSuccess(value, { - "/Root/test", "/Root/MyDatabase", - "/Root/slice", "/Root/TestDatabase", + "/Root/test", + "/Root/slice", "/Root/qwerty" }); + } + Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseName) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database"); VerifyJsonAutocompleteSuccess(value, { "/Root/MyDatabase", @@ -1264,19 +1256,28 @@ Y_UNIT_TEST_SUITE(Viewer) { "/Root/slice", "/Root/qwerty" }); + } + Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameWithLimit) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database", "", {}, 2); VerifyJsonAutocompleteSuccess(value, { "/Root/MyDatabase", "/Root/TestDatabase" }); + } + Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNamePOST) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2); VerifyJsonAutocompleteSuccess(value, { "/Root/MyDatabase", "/Root/TestDatabase" }); + } + Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameLowerCase) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2, true); VerifyJsonAutocompleteSuccess(value, { "/Root/MyDatabase", @@ -1293,7 +1294,10 @@ Y_UNIT_TEST_SUITE(Viewer) { "orders", "products" }); + } + Y_UNIT_TEST(JsonAutocompleteSchemePOST) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_POST, value, "clien", "/Root/Database"); VerifyJsonAutocompleteSuccess(value, { "clients", @@ -1302,7 +1306,7 @@ Y_UNIT_TEST_SUITE(Viewer) { }); } - Y_UNIT_TEST(JsonAutocompleteColumns) { + Y_UNIT_TEST(JsonAutocompleteEmptyColumns) { NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "", "/Root/Database", {"orders"}); VerifyJsonAutocompleteSuccess(value, { @@ -1310,14 +1314,20 @@ Y_UNIT_TEST_SUITE(Viewer) { "name", "description" }); + } + Y_UNIT_TEST(JsonAutocompleteColumns) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_GET, value, "nam", "/Root/Database", {"orders", "products"}); VerifyJsonAutocompleteSuccess(value, { "name", "id", "description", }); + } + Y_UNIT_TEST(JsonAutocompleteColumnsPOST) { + NJson::TJsonValue value; JsonAutocompleteTest(HTTP_METHOD_POST, value, "nam", "/Root/Database", {"orders", "products"}); VerifyJsonAutocompleteSuccess(value, { "name",