Skip to content

Commit

Permalink
[geocoder] Improve result relevance: ignore numerical suburb/sublocal…
Browse files Browse the repository at this point in the history
…ity without locality matching
  • Loading branch information
Anatoly Serdtcev authored and mpimenov committed Aug 28, 2019
1 parent eaa2c4d commit df0291c
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 5 deletions.
18 changes: 18 additions & 0 deletions base/base_tests/string_utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,24 @@ UNIT_TEST(IsUtf8Test)
TEST(strings::IsASCIIString("Nice places in Zhodino.kml"), ());
}

UNIT_TEST(IsASCIINumericTest)
{
TEST(strings::IsASCIINumeric("0"), ());
TEST(strings::IsASCIINumeric("1"), ());
TEST(strings::IsASCIINumeric("10"), ());
TEST(strings::IsASCIINumeric("01"), ());
TEST(strings::IsASCIINumeric("00"), ());

TEST(!strings::IsASCIINumeric(""), ());
TEST(!strings::IsASCIINumeric(" "), ());
TEST(!strings::IsASCIINumeric(" 9"), ());
TEST(!strings::IsASCIINumeric("9 "), ());
TEST(!strings::IsASCIINumeric("+3"), ());
TEST(!strings::IsASCIINumeric("-2"), ());
TEST(!strings::IsASCIINumeric("0x09"), ());
TEST(!strings::IsASCIINumeric("0.1"), ());
}

UNIT_TEST(CountNormLowerSymbols)
{
char const * strs[] = {"æüßs",
Expand Down
7 changes: 7 additions & 0 deletions base/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,13 @@ bool IsASCIISpace(UniChar c)
return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
}

bool IsASCIINumeric(std::string const & str)
{
if (str.empty())
return false;
return std::all_of(str.begin(), str.end(), strings::IsASCIIDigit);
}

bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }

bool StartsWith(UniString const & s, UniString const & p)
Expand Down
1 change: 1 addition & 0 deletions base/string_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ UniString MakeUniString(std::string const & utf8s);
std::string ToUtf8(UniString const & s);
bool IsASCIIString(std::string const & str);
bool IsASCIIDigit(UniChar c);
bool IsASCIINumeric(std::string const & str);
bool IsASCIISpace(UniChar c);
bool IsASCIILatin(UniChar c);

Expand Down
33 changes: 33 additions & 0 deletions geocoder/geocoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,12 @@ void Geocoder::FillRegularLayer(Context const & ctx, Type type, Tokens const & s
return;

if (ctx.GetLayers().empty() || HasParent(ctx.GetLayers(), d))
{
if (type > Type::Locality && !IsRelevantLocalityMember(ctx, d, subquery))
return;

curLayer.m_entries.emplace_back(docId);
}
});
}

Expand All @@ -415,4 +420,32 @@ bool Geocoder::HasParent(vector<Geocoder::Layer> const & layers, Hierarchy::Entr
}
return false;
}

bool Geocoder::IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member,
Tokens const & subquery) const
{
auto const isNumeric = subquery.size() == 1 && strings::IsASCIINumeric(subquery.front());
return !isNumeric || HasMemberLocalityInMatching(ctx, member);
}

bool Geocoder::HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const
{
for (auto const & layer : ctx.GetLayers())
{
auto const layerType = layer.m_type;
if (layerType > Type::Locality)
break;
if (layerType != Type::Locality)
continue;

for (auto const docId : layer.m_entries)
{
auto const & matchedEntry = m_index.GetDoc(docId);
if (m_hierarchy.IsParentTo(matchedEntry, member))
return true;
}
}

return false;
}
} // namespace geocoder
3 changes: 3 additions & 0 deletions geocoder/geocoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ class Geocoder
// Returns whether any of the paths through |layers| can be extended
// by appending |e|.
bool HasParent(std::vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const;
bool IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member,
Tokens const & subquery) const;
bool HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const;

Hierarchy m_hierarchy;

Expand Down
26 changes: 21 additions & 5 deletions geocoder/geocoder_tests/geocoder_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,18 +272,13 @@ UNIT_TEST(Geocoder_LocalityBuilding)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Zelenograd"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}}
31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}}
32 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}}
)#";

ScopedFile const regionsJsonFile("regions.jsonl", kData);
Geocoder geocoder(regionsJsonFile.GetFullPath());

base::GeoObjectId const building2(0x22);

TestGeocoder(geocoder, "Zelenograd 2", {{building2, 1.0}});
}

Expand All @@ -305,6 +300,27 @@ UNIT_TEST(Geocoder_SubregionInLocality)
TestGeocoder(geocoder, "Москва", {{Id{0x10}, 1.0}, {Id{0x11}, 0.6}});
}

// Geocoder_NumericalSuburb* ----------------------------------------------------------------------
UNIT_TEST(Geocoder_NumericalSuburbRelevance)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"region": "Metro Manila"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Caloocan", "region": "Metro Manila"}}}}}
12 {"properties": {"locales": {"default": {"address": {"suburb": "60", "locality": "Caloocan", "region": "Metro Manila"}}}}}
20 {"properties": {"locales": {"default": {"address": {"locality": "Белгород"}}}}}
21 {"properties": {"locales": {"default": {"address": {"street": "Щорса", "locality": "Белгород"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "60", "street": "Щорса", "locality": "Белгород"}}}}}
)#";

ScopedFile const regionsJsonFile("regions.jsonl", kData);
Geocoder geocoder(regionsJsonFile.GetFullPath());

TestGeocoder(geocoder, "Caloocan, 60", {{Id{0x12}, 1.0}});
TestGeocoder(geocoder, "60", {});
TestGeocoder(geocoder, "Metro Manila, 60", {{Id{0x10}, 1.0}});
TestGeocoder(geocoder, "Белгород, Щорса, 60", {{Id{0x22}, 1.0}});
}

//--------------------------------------------------------------------------------------------------
UNIT_TEST(Geocoder_EmptyFileConcurrentRead)
{
Expand Down

0 comments on commit df0291c

Please sign in to comment.