Skip to content

Commit

Permalink
Lower the score of variated names slightly
Browse files Browse the repository at this point in the history
  • Loading branch information
vesameskanen committed Jun 9, 2022
1 parent 0b7225c commit 8a0ae50
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions middleware/confidenceScoreDT.js
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ function checkLanguageNames(text, doc, stripNumbers, tryGenitive) {
var parent = doc.parent || {};
var textWC = text.split(' ').length;

var checkNewBest = function(_text, name) {
var score = fuzzy.match(_text, name);
var checkNewBest = function(_text, name, coeff) {
var score = fuzzy.match(_text, name) * score;
logger.debug('#', _text, '|', name, score);
if (score >= bestScore ) {
bestScore = score;
Expand All @@ -283,12 +283,14 @@ function checkLanguageNames(text, doc, stripNumbers, tryGenitive) {
return score;
};

// strict length limit is not necessary against user typed search string
// which can be unfinished: porin raut vs rautatieasema, pori
var checkAdminName = function(_text, admin, name, limitLength) {
admin = normalize(admin);
if(admin && name.indexOf(admin) === -1) {
const extendedName = admin + ' ' + name;
if(!limitLength || extendedName.length <= _text.length) {
checkNewBest(_text, admin + ' ' + name);
checkNewBest(_text, admin + ' ' + name, 0.99);
}
}
};
Expand All @@ -309,19 +311,19 @@ function checkLanguageNames(text, doc, stripNumbers, tryGenitive) {

if (score > genitiveThreshold && tryGenitive) { // don't prefix unless base match is OK
var nameWC = name.split(' ').length;
var score = checkNewBest(text, name);
var score = checkNewBest(text, name, 1.0);
// prefix with parent admins to catch cases like 'kontulan r-kioski = r-kioski, kontula'
for(var key in adminWeights) {
var admins = parent[key];
var check = Array.isArray(admins) ? checkAdminNames : checkAdminName;
var adminCheck = Array.isArray(admins) ? checkAdminNames : checkAdminName;
if(textLen > 2 + nameLen && textWC > nameWC) { // Shortest admin prefix is 'ii '
check(text, admins, name, false);
adminCheck(text, admins, name, false);
if (doc.street) { // try also street: 'helsinginkadun r-kioski'
checkAdminName(text, doc.street, name, false);
}
}
if (nameLen > 2 + textLen && nameWC > textWC) {
check(name, admins, text, true);
adminCheck(name, admins, text, true);
if (doc.street) {
checkAdminName(name, doc.street, text, true);
}
Expand Down

0 comments on commit 8a0ae50

Please sign in to comment.