Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

autocomplete check word containing #4270

Merged
merged 1 commit into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ydb/core/viewer/json_autocomplete.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
Tables.emplace_back(table);
}
Prefix = request.GetPrefix();
Limit = request.GetLimit();

Timeout = ViewerRequest->Get()->Record.GetTimeout();
Direct = true;
Expand Down Expand Up @@ -112,6 +113,9 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
} else {
SearchWord = Prefix;
}
if (Limit == 0) {
Limit = std::numeric_limits<ui32>::max();
}
}

void ParseCgiParameters(const TCgiParameters& params) {
Expand All @@ -137,6 +141,9 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
}
}
Prefix = Prefix.empty() ? requestData["prefix"].GetStringSafe({}) : Prefix;
if (requestData["limit"].IsDefined()) {
Limit = requestData["limit"].GetInteger();
}
}
}

Expand Down Expand Up @@ -234,6 +241,7 @@ class TJsonAutocomplete : public TViewerPipeClient<TJsonAutocomplete> {
autocompleteRequest->AddTables(path);
}
autocompleteRequest->SetPrefix(Prefix);
autocompleteRequest->SetLimit(Limit);

ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kAutocompleteRequest, nodeId);
SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64());
Expand Down
1 change: 1 addition & 0 deletions ydb/core/viewer/protos/viewer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ message TSchemeCacheRequest {
string Database = 1;
repeated string Tables = 2;
string Prefix = 3;
uint32 Limit = 4;
}

message TEvViewerRequest {
Expand Down
45 changes: 37 additions & 8 deletions ydb/core/viewer/query_autocomplete_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace NKikimr::NViewer {

inline ui32 LevenshteinDistance(TString word1, TString word2) {
word1 = to_lower(word1);
word2 = to_lower(word2);
ui32 size1 = word1.size();
ui32 size2 = word2.size();
ui32 dist[size1 + 1][size2 + 1]; // distance matrix
Expand Down Expand Up @@ -32,23 +34,50 @@ inline ui32 LevenshteinDistance(TString word1, TString word2) {
template<typename Type>
class FuzzySearcher {
struct WordHit {
ui32 Distance;
bool Contains;
ui32 LengthDifference;
ui32 LevenshteinDistance;
Type Data;

WordHit(ui32 dist, Type data)
: Distance(dist)
WordHit(bool contains, ui32 lengthDifference, ui32 levenshteinDistance, Type data)
: Contains(contains)
, LengthDifference(lengthDifference)
, LevenshteinDistance(levenshteinDistance)
, Data(data)
{}

bool operator<(const WordHit& other) const {
return Distance < other.Distance;
if (this->Contains && !other.Contains) {
return true;
}
if (this->Contains && other.Contains) {
return this->LengthDifference < other.LengthDifference;
}
return this->LevenshteinDistance < other.LevenshteinDistance;
}

bool operator>(const WordHit& other) const {
return Distance > other.Distance;
if (!this->Contains && other.Contains) {
return true;
}
if (this->Contains && other.Contains) {
return this->LengthDifference > other.LengthDifference;
}
return this->LevenshteinDistance > other.LevenshteinDistance;
}
};

static WordHit CalculateWordHit(TString searchWord, TString testWord, Type testData) {
searchWord = to_lower(searchWord);
testWord = to_lower(testWord);
if (testWord.Contains(searchWord)) {
return {1, static_cast<ui32>(testWord.length() - searchWord.length()), 0, testData};
} else {
ui32 levenshteinDistance = LevenshteinDistance(searchWord, testWord);
return {0, 0, levenshteinDistance, testData};
}
}

public:
THashMap<TString, Type> Dictionary;

Expand All @@ -63,15 +92,15 @@ class FuzzySearcher {

TVector<Type> Search(const TString& searchWord, ui32 limit = 10) {
auto cmp = [](const WordHit& left, const WordHit& right) {
return left.Distance < right.Distance;
return left < right;
};
std::priority_queue<WordHit, TVector<WordHit>, decltype(cmp)> queue(cmp);

for (const auto& [word, data]: Dictionary) {
auto wordHit = WordHit(LevenshteinDistance(searchWord, word), data);
auto wordHit = CalculateWordHit(searchWord, word, data);
if (queue.size() < limit) {
queue.emplace(wordHit);
} else if (wordHit.Distance < queue.top().Distance) {
} else if (queue.size() > 0 && wordHit < queue.top()) {
queue.pop();
queue.emplace(wordHit);
}
Expand Down
124 changes: 67 additions & 57 deletions ydb/core/viewer/viewer_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("abc", ""), 3);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "apple"), 0);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("apple", "aple"), 1);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("UPPER", "upper"), 0);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("horse", "ros"), 3);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("intention", "execution"), 5);
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice"), 3);
Expand All @@ -1048,64 +1049,48 @@ Y_UNIT_TEST_SUITE(Viewer) {
UNIT_ASSERT_VALUES_EQUAL(LevenshteinDistance("/slice/db", "/slice/db26000"), 5);
}

Y_UNIT_TEST(FuzzySearcher)
{
TVector<TString> dictionary = { "/slice", "/slice/db", "/slice/db26000" };
TVector<TString> SimilarWordsDictionary = { "/slice", "/slice/db", "/slice/db26000" };
TVector<TString> DifferentWordsDictionary = { "/orders", "/peoples", "/OrdinaryScheduleTables" };

{
TVector<TString> expectations = { "/slice/db" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 1);
void FuzzySearcherTest(TVector<TString>& dictionary, TString search, ui32 limit, TVector<TString> expectations) {
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search(search, limit);

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}

{
TVector<TString> expectations = { "/slice/db", "/slice" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 2);

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}

{
TVector<TString> expectations = { "/slice/db", "/slice", "/slice/db26000"};
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 3);
Y_UNIT_TEST(FuzzySearcherLimit1OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 1, { "/slice/db" });
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherLimit2OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 2, { "/slice/db", "/slice/db26000" });
}

{
TVector<TString> expectations = { "/slice/db", "/slice", "/slice/db26000" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db", 4);
Y_UNIT_TEST(FuzzySearcherLimit3OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 3, { "/slice/db", "/slice/db26000", "/slice"});
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherLimit4OutOf4)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db", 4, { "/slice/db", "/slice/db26000", "/slice"});
}

{
TVector<TString> expectations = { "/slice/db26000", "/slice/db", "/slice" };
auto fuzzy = FuzzySearcher<TString>(dictionary);
auto result = fuzzy.Search("/slice/db26001");
Y_UNIT_TEST(FuzzySearcherLongWord)
{
FuzzySearcherTest(SimilarWordsDictionary, "/slice/db26001", 10, { "/slice/db26000", "/slice/db", "/slice"});
}

UNIT_ASSERT_VALUES_EQUAL(expectations.size(), result.size());
for (ui32 i = 0; i < expectations.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(expectations[i], result[i]);
}
}
Y_UNIT_TEST(FuzzySearcherPriority)
{
FuzzySearcherTest(DifferentWordsDictionary, "/ord", 10, { "/orders", "/OrdinaryScheduleTables", "/peoples"});
FuzzySearcherTest(DifferentWordsDictionary, "Tables", 10, { "/OrdinaryScheduleTables", "/orders", "/peoples"});
}

void JsonAutocompleteTest(HTTP_METHOD method, NJson::TJsonValue& value, TString prefix = "", TString database = "", TVector<TString> tables = {}, ui32 limit = 10, bool lowerCaseContentType = false) {
Expand Down Expand Up @@ -1136,6 +1121,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
if (prefix) {
httpReq.CgiParameters.emplace("prefix", prefix);
}
httpReq.CgiParameters.emplace("limit", ToString(limit));
} else if (method == HTTP_METHOD_POST) {
NJson::TJsonArray tableArray;
for (const TString& table : tables) {
Expand All @@ -1145,13 +1131,13 @@ Y_UNIT_TEST_SUITE(Viewer) {
NJson::TJsonValue root = NJson::TJsonMap{
{"database", database},
{"table", tableArray},
{"prefix", prefix}
{"prefix", prefix},
{"limit", limit}
};
httpReq.PostContent = NJson::WriteJson(root);
auto contType = lowerCaseContentType ? "content-type" : "Content-Type";
httpReq.HttpHeaders.AddHeader(contType, "application/json");
auto contentType = lowerCaseContentType ? "content-type" : "Content-Type";
httpReq.HttpHeaders.AddHeader(contentType, "application/json");
}
httpReq.CgiParameters.emplace("limit", ToString(limit));
httpReq.CgiParameters.emplace("direct", "1");
auto page = MakeHolder<TMonPage>("viewer", "title");
TMonService2HttpRequest monReq(nullptr, &httpReq, nullptr, page.Get(), "/json/autocomplete", nullptr);
Expand Down Expand Up @@ -1236,7 +1222,7 @@ Y_UNIT_TEST_SUITE(Viewer) {
});
}

Y_UNIT_TEST(JsonAutocompleteDatabase) {
Y_UNIT_TEST(JsonAutocompleteStartOfDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root");
VerifyJsonAutocompleteSuccess(value, {
Expand All @@ -1246,16 +1232,22 @@ Y_UNIT_TEST_SUITE(Viewer) {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteEndOfDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "Database");
VerifyJsonAutocompleteSuccess(value, {
"/Root/test",
"/Root/MyDatabase",
"/Root/slice",
"/Root/TestDatabase",
"/Root/test",
"/Root/slice",
"/Root/qwerty"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseName) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database");
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
Expand All @@ -1264,19 +1256,28 @@ Y_UNIT_TEST_SUITE(Viewer) {
"/Root/slice",
"/Root/qwerty"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameWithLimit) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "/Root/Database", "", {}, 2);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNamePOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
"/Root/TestDatabase"
});
}

Y_UNIT_TEST(JsonAutocompleteSimilarDatabaseNameLowerCase) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "/Root/Database", "", {}, 2, true);
VerifyJsonAutocompleteSuccess(value, {
"/Root/MyDatabase",
Expand All @@ -1293,7 +1294,10 @@ Y_UNIT_TEST_SUITE(Viewer) {
"orders",
"products"
});
}

Y_UNIT_TEST(JsonAutocompleteSchemePOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "clien", "/Root/Database");
VerifyJsonAutocompleteSuccess(value, {
"clients",
Expand All @@ -1302,22 +1306,28 @@ Y_UNIT_TEST_SUITE(Viewer) {
});
}

Y_UNIT_TEST(JsonAutocompleteColumns) {
Y_UNIT_TEST(JsonAutocompleteEmptyColumns) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "", "/Root/Database", {"orders"});
VerifyJsonAutocompleteSuccess(value, {
"id",
"name",
"description"
});
}

Y_UNIT_TEST(JsonAutocompleteColumns) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_GET, value, "nam", "/Root/Database", {"orders", "products"});
VerifyJsonAutocompleteSuccess(value, {
"name",
"id",
"description",
});
}

Y_UNIT_TEST(JsonAutocompleteColumnsPOST) {
NJson::TJsonValue value;
JsonAutocompleteTest(HTTP_METHOD_POST, value, "nam", "/Root/Database", {"orders", "products"});
VerifyJsonAutocompleteSuccess(value, {
"name",
Expand Down
Loading