From b50191df8d5868619131cf9e0e474bd04c1c3287 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 11 Jul 2024 12:17:55 +0200 Subject: [PATCH] fix(dedupe): improved deduplication between USA ZIP vs ZIP+4 properties --- helper/diffPlaces.js | 31 ++++++++++++++++++++++++++----- test/unit/helper/diffPlaces.js | 26 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index 6909034f2..754fc0c21 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -41,9 +41,13 @@ function isLayerDifferent(item1, item2){ return false; } +function isUSA(item) { + if (!_.isArray(item?.parent?.country_a)) { return false; } + return item.parent.country_a[0] === 'USA'; +} + function isUsState(item) { - if (!_.isArray(item.parent.country_a)) { return false; } - return item.parent.country_a[0] === 'USA' && item.layer === 'region'; + return isUSA(item) && item.layer === 'region'; } // Geonames records in the locality and localadmin layer are parented by themselves @@ -206,7 +210,7 @@ function isAddressDifferent(item1, item2){ // only compare zip if both records have it, otherwise just ignore and assume it's the same // since by this time we've already compared parent hierarchies if( _.has(address1, 'zip') && _.has(address2, 'zip') ){ - if( isPropertyDifferent(address1, address2, 'zip') ){ return true; } + if( isZipDifferent(item1, item2) ){ return true; } } return false; @@ -255,10 +259,27 @@ function isDifferent(item1, item2, requestLanguage){ return false; } +/** + * return true if zip codes are different + * + * note: handle USA ZIP+4 vs ZIP (98036-6119 vs 98036) + */ +function isZipDifferent(item1, item2) { + let address1 = _.get(item1, 'address_parts'); + let address2 = _.get(item2, 'address_parts'); + + if (isUSA(item1) && isUSA(item2)) { + const firstWordOnly = (str) => normalizeString(str).split(' ')[0]; + return isPropertyDifferent(address1, address2, 'zip', firstWordOnly); + } + + return isPropertyDifferent(address1, address2, 'zip'); +} + /** * return true if properties are different */ -function isPropertyDifferent(item1, item2, prop ){ +function isPropertyDifferent(item1, item2, prop, normalizer = normalizeString ){ // if neither item has prop, we consider them the same if( !_.has(item1, prop) && !_.has(item2, prop) ){ return false; } @@ -274,7 +295,7 @@ function isPropertyDifferent(item1, item2, prop ){ let prop1StringValue = field.getStringValue( prop1[i] ); for( let j=0; j