Skip to content

Commit

Permalink
Merge pull request #359 from pelias/remove_full_token_address_suffix_…
Browse files Browse the repository at this point in the history
…expansion

remove full_token_address_suffix_expansion
  • Loading branch information
orangejulius authored Jun 3, 2019
2 parents 908e4bd + 0e14823 commit c408b38
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 253 deletions.
20 changes: 10 additions & 10 deletions integration/analyzer_peliasIndexOneEdgeGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis( 'ampersand', 'a and & and b', ['a','&','b'] );
assertAnalysis( 'ampersand', 'land', ['l','la','lan','land'] ); // should not replace inside tokens

// full_token_address_suffix_expansion
assertAnalysis( 'full_token_address_suffix_expansion', 'rd', ['r','ro','roa','road'] );
assertAnalysis( 'full_token_address_suffix_expansion', 'ctr', ['c','ce','cen','cent','cente','center'] );
// keyword_street_suffix
assertAnalysis( 'keyword_street_suffix', 'rd', ['r','rd','ro','roa','road'] );
assertAnalysis( 'keyword_street_suffix', 'ctr', ['c', 'ct', 'ctr', 'ce', 'cen', 'cent', 'cente', 'center'] );

assertAnalysis( 'peliasIndexOneEdgeGramFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] );
Expand Down Expand Up @@ -71,19 +71,19 @@ module.exports.tests.address_suffix_expansions = function(test, common){
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'safe expansions', 'aly', [
'a', 'al', 'all', 'alle', 'alley'
'a', 'al', 'aly', 'all', 'alle', 'alley'
]);

assertAnalysis( 'safe expansions', 'xing', [
'c', 'cr', 'cro', 'cros', 'cross', 'crossi', 'crossin', 'crossing'
'x', 'xi', 'xin', 'xing', 'c', 'cr', 'cro', 'cros', 'cross', 'crossi', 'crossin', 'crossing'
]);

assertAnalysis( 'safe expansions', 'rd', [
'r', 'ro', 'roa', 'road'
'r', 'rd', 'ro', 'roa', 'road'
]);

assertAnalysis( 'unsafe expansion', 'ct st', [
'c', 'ct', 's', 'st'
'c', 'ct', 'co', 'cou', 'cour', 'court', 's', 'st', 'str', 'stre', 'stree', 'street'
]);

suite.run( t.end );
Expand All @@ -103,7 +103,7 @@ module.exports.tests.stop_words = function(test, common){
]);

assertAnalysis( 'street suffix (abbreviation)', 'AB st', [
'a', 'ab', 's', 'st'
'a', 'ab', 's', 'st', 'str', 'stre', 'stree', 'street'
]);

suite.run( t.end );
Expand Down Expand Up @@ -145,11 +145,11 @@ module.exports.tests.address = function(test, common){
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', 'we', 'wes', 'west', '26', 's', 'st'
'30', 'w', 'we', 'wes', 'west', '26', 's', 'st', 'str', 'stre', 'stree', 'street'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 's', 'st'
'4b', '921', '83', 's', 'st', 'str', 'stre', 'stree', 'street'
]);

suite.run( t.end );
Expand Down
55 changes: 28 additions & 27 deletions integration/analyzer_peliasQueryFullToken.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis( 'ampersand', 'a and & and b', ['a','&','b'] );
assertAnalysis( 'ampersand', 'land', ['land'] ); // should not replace inside tokens

// full_token_address_suffix_expansion
assertAnalysis( 'full_token_address_suffix_expansion', 'rd', ['road'] );
assertAnalysis( 'full_token_address_suffix_expansion', 'ctr', ['center'] );
assertAnalysis( 'keyword_street_suffix', 'foo Street', ['foo', 'street', 'st'], true );
assertAnalysis( 'keyword_street_suffix', 'foo Road', ['foo', 'road', 'rd'], true );
assertAnalysis( 'keyword_street_suffix', 'foo Crescent', ['foo', 'crescent', 'cres'], true );
assertAnalysis( 'keyword_compass', 'north foo', ['north', 'n', 'foo'], true );
assertAnalysis( 'keyword_compass', 'SouthWest foo', ['southwest', 'sw', 'foo'], true );
assertAnalysis( 'keyword_compass', 'foo SouthWest', ['foo', 'southwest', 'sw'], true );

assertAnalysis( 'peliasQueryFullTokenFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] );
Expand All @@ -49,22 +52,20 @@ module.exports.tests.analyze = function(test, common){
});
};

// address suffix expansions should only performed in a way that is
// safe for 'partial tokens'.
module.exports.tests.address_suffix_expansions = function(test, common){
test( 'address suffix expansions', function(t){

var suite = new elastictest.Suite( common.clientOpts, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryFullToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'safe expansions', 'aly', [ 'alley' ]);
assertAnalysis( 'safe expansions', 'aly', [ 'aly', 'alley' ]);

assertAnalysis( 'safe expansions', 'xing', [ 'crossing' ]);
assertAnalysis( 'safe expansions', 'xing', [ 'xing', 'crossing' ]);

assertAnalysis( 'safe expansions', 'rd', [ 'road' ]);
assertAnalysis( 'safe expansions', 'rd', [ 'rd', 'road' ]);

assertAnalysis( 'unsafe expansion', 'ct st', [ 'ct', 'st' ]);
assertAnalysis( 'safe expansion', 'ct st', [ 'ct', 'court', 'st', 'street' ]);

suite.run( t.end );
});
Expand All @@ -78,9 +79,9 @@ module.exports.tests.stop_words = function(test, common){
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryFullToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'street suffix', 'AB street', [ 'ab', 'street' ]);
assertAnalysis( 'street suffix', 'AB street', [ 'ab', 'street', 'st' ]);

assertAnalysis( 'street suffix (abbreviation)', 'AB st', [ 'ab', 'st' ]);
assertAnalysis( 'street suffix (abbreviation)', 'AB st', [ 'ab', 'st', 'street' ]);

suite.run( t.end );
});
Expand All @@ -102,7 +103,7 @@ module.exports.tests.functional = function(test, common){
]);

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
'101', 'mapzen', 'place', 'pl'
]);

suite.run( t.end );
Expand All @@ -117,15 +118,15 @@ module.exports.tests.tokenizer = function(test, common){
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

// specify 2 streets with a delimeter
assertAnalysis( 'forward slash', 'Bedell Street/133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'forward slash', 'Bedell Street /133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'forward slash', 'Bedell Street/ 133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'back slash', 'Bedell Street\\133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'back slash', 'Bedell Street \\133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'back slash', 'Bedell Street\\ 133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'comma', 'Bedell Street,133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'comma', 'Bedell Street ,133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'comma', 'Bedell Street, 133rd Avenue', [ 'bedell', 'street', '133', 'avenue' ]);
assertAnalysis( 'forward slash', 'Bedell Street/133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'forward slash', 'Bedell Street /133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'forward slash', 'Bedell Street/ 133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'back slash', 'Bedell Street\\133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'back slash', 'Bedell Street \\133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'back slash', 'Bedell Street\\ 133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'comma', 'Bedell Street,133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'comma', 'Bedell Street ,133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);
assertAnalysis( 'comma', 'Bedell Street, 133rd Avenue', [ 'bedell', 'street', 'st', '133', 'avenue', 'ave', 'av' ]);

suite.run( t.end );
});
Expand Down Expand Up @@ -183,15 +184,15 @@ module.exports.tests.address = function(test, common){
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
'101', 'mapzen', 'place', 'pl'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'west', '26', 'st'
'30', 'w', 'west', '26', 'st', 'street'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
'4b', '921', '83', 'st', 'street'
]);

suite.run( t.end );
Expand Down Expand Up @@ -241,15 +242,15 @@ module.exports.all = function (tape, common) {
}
};

function analyze( suite, t, analyzer, comment, text, expected ){
function analyze( suite, t, analyzer, comment, text, expected, includePosition ){
suite.assert( function( done ){
suite.client.indices.analyze({
index: suite.props.index,
analyzer: analyzer,
text: text
}, function( err, res ){
if( err ) console.error( err );
t.deepEqual( simpleTokens( res.tokens ), expected, comment );
if( err ){ console.error( err ); }
t.deepEqual( simpleTokens( res.tokens, includePosition ), expected, comment );
done();
});
});
Expand Down
6 changes: 4 additions & 2 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ function generate(){
"icu_folding",
"trim",
"custom_name",
"full_token_address_suffix_expansion",
"street_suffix",
"directionals",
"ampersand",
"remove_ordinals",
"removeAllZeroNumericPrefix",
Expand Down Expand Up @@ -99,7 +100,8 @@ function generate(){
"icu_folding",
"trim",
"remove_ordinals",
"full_token_address_suffix_expansion",
"street_suffix",
"directionals",
"ampersand",
"removeAllZeroNumericPrefix",
"unique",
Expand Down
106 changes: 0 additions & 106 deletions synonyms/full_token_address_suffix_expansion.txt

This file was deleted.

Loading

0 comments on commit c408b38

Please sign in to comment.