From 52d40690f8d490332f934cd698ba84612031b04d Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 19 Apr 2022 14:05:29 +0200 Subject: [PATCH] feat(fields): remove phrase field in Document in favor of duplicating name during serialization. --- Document.js | 9 +---- post/deduplication.js | 2 +- post/language_field_trimming.js | 60 ++++++++++++++-------------- test/document/name.js | 7 ---- test/post/deduplication.js | 16 -------- test/post/language_field_trimming.js | 23 ----------- test/serialize/test.js | 7 ---- 7 files changed, 31 insertions(+), 93 deletions(-) diff --git a/Document.js b/Document.js index aeaa685..46bee41 100644 --- a/Document.js +++ b/Document.js @@ -27,7 +27,6 @@ const parentFields = [ function Document( source, layer, source_id ){ this.name = {}; - this.phrase = {}; this.parent = {}; this.address_parts = {}; this.center_point = {}; @@ -93,7 +92,7 @@ Document.prototype.toESDocument = function() { var doc = { name: this.name, - phrase: this.phrase, + phrase: this.name, parent: this.parent, address_parts: this.address_parts, center_point: this.center_point, @@ -249,13 +248,10 @@ Document.prototype.setName = function( prop, value ){ validate.truthy(value); validate.regex.nomatch(value, /https?:\/\//); - // must copy name to 'phrase' index if( Array.isArray( this.name[ prop ] ) ){ this.name[ prop ][ 0 ] = value; - this.phrase[ prop ][ 0 ] = value; } else { this.name[ prop ] = value; - this.phrase[ prop ] = value; } return this; @@ -270,14 +266,12 @@ Document.prototype.setNameAlias = function( prop, value ){ // is this the first time setting this prop? ensure it's an array if( !this.hasName( prop ) ){ this.name[ prop ] = []; - this.phrase[ prop ] = []; } // is casting required to convert a scalar field to an array? else if( 'string' === typeof this.name[ prop ] ){ var stringValue = this.name[ prop ]; this.name[ prop ] = [ stringValue ]; - this.phrase[ prop ] = [ stringValue ]; } // is the array empty? ie. no prior call to setName() @@ -288,7 +282,6 @@ Document.prototype.setNameAlias = function( prop, value ){ // set the alias as the second, third, fourth, etc value in the array this.name[ prop ].push( value ); - this.phrase[ prop ].push( value ); return this; }; diff --git a/post/deduplication.js b/post/deduplication.js index 0e6b873..5f577fd 100644 --- a/post/deduplication.js +++ b/post/deduplication.js @@ -4,7 +4,7 @@ */ const _ = require('lodash'); -const prefixes = ['name', 'phrase', 'address_parts']; +const prefixes = ['name', 'address_parts']; const punctuation = /[\.]+/g; const normalize = (v) => _.isString(v) ? _.replace(v.toLowerCase(), punctuation, '') : v; diff --git a/post/language_field_trimming.js b/post/language_field_trimming.js index 5f0a8c9..4f0bbf1 100644 --- a/post/language_field_trimming.js +++ b/post/language_field_trimming.js @@ -15,50 +15,48 @@ */ const _ = require('lodash'); -const prefixes = ['name', 'phrase']; +const prefix = 'name'; function deduplication(doc) { - prefixes.forEach(prefix => { - // load the field data - // ie: an object keyed by language codes, each value is an array of names - let field = doc[prefix]; - if (!_.isPlainObject(field)) { return; } + // load the field data + // ie: an object keyed by language codes, each value is an array of names + let field = doc[prefix]; + if (!_.isPlainObject(field)) { return; } - // fetch the 'default' language - var defaults = _.get(field, 'default'); + // fetch the 'default' language + var defaults = _.get(field, 'default'); - // no default names, nothing to do; continue - if (_.isEmpty(defaults)) { return; } + // no default names, nothing to do; continue + if (_.isEmpty(defaults)) { return; } - // convert scalar values to arrays - defaults = _.castArray(defaults); + // convert scalar values to arrays + defaults = _.castArray(defaults); - // iterate over other languages in the field - _.each(field, (names, lang) => { + // iterate over other languages in the field + _.each(field, (names, lang) => { - // skip the 'default' language - if (lang === 'default'){ return; } + // skip the 'default' language + if (lang === 'default'){ return; } - // no names, nothing to do; continue - if (_.isEmpty(names)) { return; } + // no names, nothing to do; continue + if (_.isEmpty(names)) { return; } - // convert scalar values to arrays - names = _.castArray(names); + // convert scalar values to arrays + names = _.castArray(names); - // filter entries from this language which appear in the 'default' lang - field[lang] = _.difference(names, defaults); + // filter entries from this language which appear in the 'default' lang + field[lang] = _.difference(names, defaults); - // clean up empty language arrays - if (_.isEmpty(field[lang])) { - delete field[lang]; - } + // clean up empty language arrays + if (_.isEmpty(field[lang])) { + delete field[lang]; + } - // flatten single-value arrays - else if(_.size(field[lang]) === 1) { - field[lang] = _.first(field[lang]); - } - }); + // flatten single-value arrays + else if(_.size(field[lang]) === 1) { + field[lang] = _.first(field[lang]); + } }); } diff --git a/test/document/name.js b/test/document/name.js index 71585a3..ddddfc3 100644 --- a/test/document/name.js +++ b/test/document/name.js @@ -18,7 +18,6 @@ module.exports.tests.setName = function(test) { var doc = new Document('mysource','mylayer','myid'); t.equal(doc.setName('foo','bar'), doc, 'chainable'); t.equal(doc.name.foo, 'bar', 'setter works'); - t.equal(doc.phrase.foo, 'bar', 'setter works'); t.end(); }); test('setName - validate key', function(t) { @@ -70,9 +69,6 @@ module.exports.tests.setNameAlias = function(test) { t.equal(doc.name.foo[0], 'bar', 'setter works'); t.equal(doc.name.foo[1], 'bar', 'setter works'); t.equal(doc.name.foo[2], 'baz', 'setter works'); - t.equal(doc.phrase.foo[0], 'bar', 'setter works'); - t.equal(doc.phrase.foo[1], 'bar', 'setter works'); - t.equal(doc.phrase.foo[2], 'baz', 'setter works'); t.equal(doc.getName('foo'), 'bar', 'name set'); t.deepEqual(doc.getNameAliases('foo'), ['bar','baz'], 'aliases set'); t.end(); @@ -85,9 +81,6 @@ module.exports.tests.setNameAlias = function(test) { t.equal(doc.name.foo[0], 'bar', 'setter works'); t.equal(doc.name.foo[1], 'baz', 'setter works'); t.equal(doc.name.foo[2], 'boo', 'setter works'); - t.equal(doc.phrase.foo[0], 'bar', 'setter works'); - t.equal(doc.phrase.foo[1], 'baz', 'setter works'); - t.equal(doc.phrase.foo[2], 'boo', 'setter works'); t.equal(doc.getName('foo'), 'bar', 'name set'); t.deepEqual(doc.getNameAliases('foo'), ['baz','boo'], 'aliases set'); t.end(); diff --git a/test/post/deduplication.js b/test/post/deduplication.js index b1f9021..d0bce29 100644 --- a/test/post/deduplication.js +++ b/test/post/deduplication.js @@ -35,22 +35,6 @@ module.exports.tests.dedupe = function (test) { t.end(); }); - - test('dedupe - phrase', function (t) { - var doc = new Document('mysource', 'mylayer', 'myid'); - - doc.setNameAlias('default', 'test'); - doc.setName('default', 'test'); - doc.setNameAlias('default', 'test'); - doc.setNameAlias('default', 'test 2'); - doc.setNameAlias('default', 'test'); - doc.setNameAlias('default', '...Te...st...'); - - deduplication(doc); - t.deepEquals(doc.phrase.default, ['test', 'test 2']); - - t.end(); - }); }; module.exports.all = function (tape, common) { diff --git a/test/post/language_field_trimming.js b/test/post/language_field_trimming.js index 4b3119e..bb9c7ac 100644 --- a/test/post/language_field_trimming.js +++ b/test/post/language_field_trimming.js @@ -28,29 +28,6 @@ module.exports.tests.dedupe = function (test) { t.end(); }); - test('dedupe - phrase', function (t) { - var doc = new Document('mysource', 'mylayer', 'myid'); - - doc.setName('default', 'test1'); - doc.setNameAlias('default', 'test2'); - doc.setNameAlias('default', 'test3'); - - doc.setName('en', 'test1'); - doc.setNameAlias('en', 'test3'); - doc.setNameAlias('en', 'test4'); - - doc.setName('de', 'test1'); - doc.setNameAlias('de', 'test2'); - - language_field_trimming(doc); - - t.deepEquals(doc.phrase.default, ['test1', 'test2', 'test3']); - t.deepEquals(doc.phrase.en, 'test4'); - t.false(doc.phrase.de); - - t.end(); - }); - test('dedupe - two default names, one from a language code', function (t) { var doc = new Document('mysource', 'mylayer', 'myid'); diff --git a/test/serialize/test.js b/test/serialize/test.js index ca2085c..cd7ee21 100644 --- a/test/serialize/test.js +++ b/test/serialize/test.js @@ -27,7 +27,6 @@ module.exports.tests.minimal = function(test) { 'layer': 'mylayer', 'source_id': 'myid', 'name': {}, - 'phrase': {}, 'parent': {}, 'address_parts': {}, 'category': [], @@ -82,12 +81,6 @@ module.exports.tests.complete = function(test) { 'alt': 'Haggerston City Farm' }, - // place name (phrase analysis) - 'phrase':{ - 'default': 'Hackney City Farm', - 'alt': 'Haggerston City Farm' - }, - // address data 'address_parts':{ 'number': '10',