From cd91e53600ce1de4666b9212096f91dae6c5c625 Mon Sep 17 00:00:00 2001 From: Vesa Meskanen Date: Fri, 10 Jun 2022 16:04:45 +0300 Subject: [PATCH] More general handling of wrong parsing --- sanitizer/_text_addressit.js | 37 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/sanitizer/_text_addressit.js b/sanitizer/_text_addressit.js index 8e43875b..88417314 100644 --- a/sanitizer/_text_addressit.js +++ b/sanitizer/_text_addressit.js @@ -37,6 +37,8 @@ var cleanRegions; var postalCodeValidator = function(code) { return true; }; // default = accept everything var streetNumberValidator = function(code) { return true; }; +const unusedParse = ['state', 'county', 'country', 'borough']; + if (api && api.localization) { filteredRegions = api.localization.filteredRegions; cleanRegions = api.localization.cleanRegions; @@ -108,16 +110,6 @@ function assignValidLibpostalParsing(parsedText, fromLibpostal, text) { } } - // parser often misinterprets partial text (la, ny, etc) as US state or country - // we should reprogram parsing database with finnish addresses only! - const mistake = fromLibpostal.state || fromLibpostal.country; - if (mistake) { - if (parsedText.name && parsedText.name.indexOf(mistake) === -1) { - parsedText.name = text; // parser is confused, search for full text - return; - } - } - const nbrh = fromLibpostal.neighbourhood; if(nbrh) { parsedText.neighbourhood = nbrh; @@ -150,6 +142,25 @@ function assignValidLibpostalParsing(parsedText, fromLibpostal, text) { if(check.assigned(fromLibpostal.postalcode) && postalCodeValidator(fromLibpostal.postalcode)) { parsedText.postalcode = fromLibpostal.postalcode; } + + // parser often misinterprets partial text (la, ny, etc) as US state, county or country + // so that some part of search text can get totally ignored + // we should reprogram parsing database with finnish addresses only! + // Libpostal does not document how to do that. + unusedParse.forEach(key => { + const mistake = fromLibpostal[key]; + // check if parser has erased some search components which would be ignored in search + if (mistake) { + if ((!parsedText.name || parsedText.name.indexOf(mistake) === -1) && + (!parsedText.street || parsedText.street.indexOf(mistake) === -1) && + (!parsedText.regions || parsedText.regions.indexOf(mistake) === -1) + ) { + // parser is confused, search for full text + parsedText.name = text; + delete parsedText.regions; + } + } + }); } @@ -192,9 +203,9 @@ function _sanitize( raw, clean ){ } if (parsedText.regions) { for (var i=0; i