Skip to content

Commit

Permalink
autocomplete-improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
StekPerepolnen committed May 3, 2024
1 parent 6dc1fa2 commit 33b35e4
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 65 deletions.
8 changes: 8 additions & 0 deletions ydb/core/viewer/json_autocomplete.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
Tables.emplace_back(table);
}
Prefix = request.GetPrefix();
Limit = request.GetLimit();

Timeout = ViewerRequest->Get()->Record.GetTimeout();
Direct = true;
Expand Down Expand Up @@ -112,6 +113,9 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
} else {
SearchWord = Prefix;
}
if (Limit == 0) {
Limit = std::numeric_limits<ui32>::max();
}
}

void ParseCgiParameters(const TCgiParameters& params) {
Expand All @@ -137,6 +141,9 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
}
}
Prefix = Prefix.empty() ? requestData["prefix"].GetStringSafe({}) : Prefix;
if (requestData["limit"].IsDefined()) {
Limit = requestData["limit"].GetInteger();
}
}
}

Expand Down Expand Up @@ -234,6 +241,7 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
autocompleteRequest->AddTables(path);
}
autocompleteRequest->SetPrefix(Prefix);
autocompleteRequest->SetLimit(Limit);

ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kAutocompleteRequest, nodeId);
SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64());
Expand Down
1 change: 1 addition & 0 deletions ydb/core/viewer/protos/viewer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ message TSchemeCacheRequest {
string Database = 1;
repeated string Tables = 2;
string Prefix = 3;
uint32 Limit = 4;
}

message TEvViewerRequest {
Expand Down
45 changes: 37 additions & 8 deletions ydb/core/viewer/query_autocomplete_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace NKikimr::NViewer {

inline ui32 LevenshteinDistance(TString word1, TString word2) {
word1 = to_lower(word1);
word2 = to_lower(word2);
ui32 size1 = word1.size();
ui32 size2 = word2.size();
ui32 dist[size1 + 1][size2 + 1]; // distance matrix
Expand Down Expand Up @@ -32,23 +34,50 @@ inline ui32 LevenshteinDistance(TString word1, TString word2) {
template<typename Type>
class FuzzySearcher {
struct WordHit {
ui32 Distance;
bool Contains;
ui32 LengthDifference;
ui32 LevenshteinDistance;
Type Data;

WordHit(ui32 dist, Type data)
: Distance(dist)
WordHit(bool contains, ui32 lengthDifference, ui32 levenshteinDistance, Type data)
: Contains(contains)
, LengthDifference(lengthDifference)
, LevenshteinDistance(levenshteinDistance)
, Data(data)
{}

bool operator<(const WordHit& other) const {
return Distance < other.Distance;
if (this->Contains && !other.Contains) {
return true;
}
if (this->Contains && other.Contains) {
return this->LengthDifference < other.LengthDifference;
}
return this->LevenshteinDistance < other.LevenshteinDistance;
}

bool operator>(const WordHit& other) const {
return Distance > other.Distance;
if (!this->Contains && other.Contains) {
return true;
}
if (this->Contains && other.Contains) {
return this->LengthDifference > other.LengthDifference;
}
return this->LevenshteinDistance > other.LevenshteinDistance;
}
};

static WordHit CalculateWordHit(TString searchWord, TString testWord, Type testData) {
searchWord = to_lower(searchWord);
testWord = to_lower(testWord);
if (testWord.Contains(searchWord)) {
return {1, static_cast<ui32>(testWord.length() - searchWord.length()), 0, testData};
} else {
ui32 levenshteinDistance = LevenshteinDistance(searchWord, testWord);
return {0, 0, levenshteinDistance, testData};
}
}

public:
THashMap<TString, Type> Dictionary;

Expand All @@ -63,15 +92,15 @@ class FuzzySearcher {

TVector<Type> Search(const TString& searchWord, ui32 limit = 10) {
auto cmp = [](const WordHit& left, const WordHit& right) {
return left.Distance < right.Distance;
return left < right;
};
std::priority_queue<WordHit, TVector<WordHit>, decltype(cmp)> queue(cmp);

for (const auto& [word, data]: Dictionary) {
auto wordHit = WordHit(LevenshteinDistance(searchWord, word), data);
auto wordHit = CalculateWordHit(searchWord, word, data);
if (queue.size() < limit) {
queue.emplace(wordHit);
} else if (wordHit.Distance < queue.top().Distance) {
} else if (queue.size() > 0 && wordHit < queue.top()) {
queue.pop();
queue.emplace(wordHit);
}
Expand Down
124 changes: 67 additions & 57 deletions ydb/core/viewer/viewer_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("abc", ""), 3);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "apple"), 0);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "aple"), 1);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("UPPER", "upper"), 0);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("horse", "ros"), 3);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("intention", "execution"), 5);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice"), 3);
Expand All @@ -1048,64 +1049,48 @@ Y_UNIT_TEST_SUITE(Viewer) {
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice/db26000"), 5);
}

Y_UNIT_TEST(FuzzySearcher)
{
TVector<TString> dictionary = { "/slice", "/slice/db", "/slice/db26000" };
TVector<TString> SimilarWordsDictionary = { "/slice", "/slice/db", "/slice/db26000" };
TVector<TString> DifferentWordsDictionary = { "/orders", "/peoples", "/OrdinaryScheduleTables" };

{
TVector<TString> expectations = { "/slice/db" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 1);
void FuzzySearcherTest(TVector<TString>& dictionary, TString search, ui32 limit, TVector<TString> expectations) {
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search(search, limit);

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}

{
TVector<TString> expectations = { "/slice/db", "/slice" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 2);

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}

{
TVector<TString> expectations = { "/slice/db", "/slice", "/slice/db26000"};
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 3);
Y_UNIT_TEST(FuzzySearcherLimit1OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 1, { "/slice/db" });
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherLimit2OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 2, { "/slice/db", "/slice/db26000" });
}

{
TVector<TString> expectations = { "/slice/db", "/slice", "/slice/db26000" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 4);
Y_UNIT_TEST(FuzzySearcherLimit3OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 3, { "/slice/db", "/slice/db26000", "/slice"});
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherLimit4OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 4, { "/slice/db", "/slice/db26000", "/slice"});
}

{
TVector<TString> expectations = { "/slice/db26000", "/slice/db", "/slice" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db26001");
Y_UNIT_TEST(FuzzySearcherLongWord)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db26001", 10, { "/slice/db26000", "/slice/db", "/slice"});
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherPriority)
{
FuzzySearcherTest(DifferentWordsDictionary, "/ord", 10, { "/orders", "/OrdinaryScheduleTables", "/peoples"});
FuzzySearcherTest(DifferentWordsDictionary, "Tables", 10, { "/OrdinaryScheduleTables", "/orders", "/peoples"});
}

void JsonAutocompleteTest(HTTP_METHOD method, NJson::TJsonValue& value, TString prefix = "", TString database = "", TVector<TString> tables = {}, ui32 limit = 10, bool lowerCaseContentType = false) {
Expand Down Expand Up @@ -1136,6 +1121,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
if (prefix) {
httpReq.CgiParameters.emplace("prefix", prefix);
}
httpReq.CgiParameters.emplace("limit", ToString(limit));
} else if (method == HTTP_METHOD_POST) {
NJson::TJsonArray tableArray;
for (const TString& table : tables) {
Expand All @@ -1145,13 +1131,13 @@ Y_UNIT_TEST_SUITE(Viewer) {
NJson::TJsonValue root = NJson::TJsonMap{
{"database", database},
{"table", tableArray},
{"prefix", prefix}
{"prefix", prefix},
{"limit", limit}
};
httpReq.PostContent = NJson::WriteJson(root);
auto contType = lowerCaseContentType ? "content-type" : "Content-Type";
httpReq.HttpHeaders.AddHeader(contType, "application/json");
auto contentType = lowerCaseContentType ? "content-type" : "Content-Type";
httpReq.HttpHeaders.AddHeader(contentType, "application/json");
}
httpReq.CgiParameters.emplace("limit", ToString(limit));
httpReq.CgiParameters.emplace("direct", "1");
auto page = MakeHolder<TMonPage>("viewer", "title");
TMonService2HttpRequest monReq(nullptr, &httpReq, nullptr, page.Get(), "/json/autocomplete", nullptr);
Expand Down Expand Up @@ -1236,7 +1222,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
});
}

Y_UNIT_TEST(JsonAutocompleteDatabase) {
Y_UNIT_TEST(JsonAutocompleteStartOfDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root");
VerifyJsonAutocompleteSuccess(value, {
Expand All @@ -1246,16 +1232,22 @@ Y_UNIT_TEST_SUITE(Viewer) {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteEndOfDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "Database");
VerifyJsonAutocompleteSuccess(value, {
"/Root/test",
"/Root/MyDatabase",
"/Root/slice",
"/Root/TestDatabase",
"/Root/test",
"/Root/slice",
"/Root/qwerty"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database");
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
Expand All @@ -1264,19 +1256,28 @@ Y_UNIT_TEST_SUITE(Viewer) {
"/Root/slice",
"/Root/qwerty"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameWithLimit) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database", "", {}, 2);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNamePOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameLowerCase) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2, true);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
Expand All @@ -1293,7 +1294,10 @@ Y_UNIT_TEST_SUITE(Viewer) {
"orders",
"products"
});
}

Y_UNIT_TEST(JsonAutocompleteSchemePOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "clien", "/Root/Database");
VerifyJsonAutocompleteSuccess(value, {
"clients",
Expand All @@ -1302,22 +1306,28 @@ Y_UNIT_TEST_SUITE(Viewer) {
});
}

Y_UNIT_TEST(JsonAutocompleteColumns) {
Y_UNIT_TEST(JsonAutocompleteEmptyColumns) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "", "/Root/Database", {"orders"});
VerifyJsonAutocompleteSuccess(value, {
"id",
"name",
"description"
});
}

Y_UNIT_TEST(JsonAutocompleteColumns) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "nam", "/Root/Database", {"orders", "products"});
VerifyJsonAutocompleteSuccess(value, {
"name",
"id",
"description",
});
}

Y_UNIT_TEST(JsonAutocompleteColumnsPOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "nam", "/Root/Database", {"orders", "products"});
VerifyJsonAutocompleteSuccess(value, {
"name",
Expand Down

0 comments on commit 33b35e4

Please sign in to comment.