Skip to content

Commit

Permalink
feat(peliasQuery): add peliasQueryAnalyzer as a replacement for analy…
Browse files Browse the repository at this point in the history
…zer_peliasQueryPartialToken, analyzer_peliasQueryFullToken and analyzer_peliasPhrase
  • Loading branch information
missinglink committed Jun 19, 2019
1 parent 4052b5e commit e97a012
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 1 deletion.
121 changes: 121 additions & 0 deletions integration/analyzer_peliasQuery.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// validate analyzer is behaving as expected

var tape = require('tape'),
elastictest = require('elastictest'),
schema = require('../schema'),
punctuation = require('../punctuation');

module.exports.tests = {};

module.exports.tests.analyze = function(test, common){
test( 'analyze', function(t){

var suite = new elastictest.Suite( common.clientOpts, { schema: schema } );
var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQuery' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis('asciifolding', 'é', ['e']);
assertAnalysis('asciifolding', 'ß', ['ss']);
assertAnalysis('asciifolding', 'æ', ['ae']);
assertAnalysis('asciifolding', 'ł', ['l']);
assertAnalysis('asciifolding', 'ɰ', ['m']);
assertAnalysis('lowercase', 'F', ['f']);
assertAnalysis('trim', ' f ', ['f']);
assertAnalysis('remove_ordinals', '26t', ['26']);
assertAnalysis('remove_ordinals', '26th', ['26']);
assertAnalysis('removeAllZeroNumericPrefix', '00001', ['1']);
assertAnalysis('unique', '1 1 1', ['1','1','1']);
assertAnalysis('notnull', ' / / ', []);

// no stemming is applied
assertAnalysis('no kstem', 'mcdonalds', ['mcdonalds']);
assertAnalysis('no kstem', 'McDonald\'s', ['mcdonalds']);
assertAnalysis('no kstem', 'peoples', ['peoples']);

// remove punctuation (handled by the char_filter)
assertAnalysis( 'punctuation', punctuation.all.join(''), ['-&'] );

suite.run( t.end );
});
};

module.exports.tests.functional = function(test, common){
test( 'functional', function(t){

var suite = new elastictest.Suite( common.clientOpts, { schema: schema } );
var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQuery' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'country', 'Trinidad and Tobago', [ 'trinidad', 'and', 'tobago' ]);
assertAnalysis( 'place', 'Toys "R" Us!', [ 'toys', 'r', 'us' ]);
assertAnalysis( 'address', '101 mapzen place', [ '101', 'mapzen', 'place' ]);

suite.run( t.end );
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( common.clientOpts, { schema: schema } );
var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQuery' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', '26', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
]);

suite.run( t.end );
});
};

// @see: https://github.com/pelias/api/issues/600
module.exports.tests.unicode = function(test, common){
test( 'normalization', function(t){

var suite = new elastictest.Suite( common.clientOpts, { schema: schema } );
var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQuery' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

var latin_large_letter_e_with_acute = String.fromCodePoint(0x00C9);
var latin_small_letter_e_with_acute = String.fromCodePoint(0x00E9);
var combining_acute_accent = String.fromCodePoint(0x0301);
var latin_large_letter_e = String.fromCodePoint(0x0045);
var latin_small_letter_e = String.fromCodePoint(0x0065);

// Chambéry (both forms appear the same)
var composed = "Chamb" + latin_small_letter_e_with_acute + "ry";
var decomposed = "Chamb" + combining_acute_accent + latin_small_letter_e + "ry"

assertAnalysis( 'composed', composed, ['chambery'] );
assertAnalysis( 'decomposed', decomposed, ['chambery'] );

// Één (both forms appear the same)
var composed = latin_large_letter_e_with_acute + latin_small_letter_e_with_acute + "n";
var decomposed = combining_acute_accent + latin_large_letter_e + combining_acute_accent + latin_small_letter_e + "n"

assertAnalysis( 'composed', composed, ['een'] );
assertAnalysis( 'decomposed', decomposed, ['een'] );

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('peliasQuery: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
1 change: 1 addition & 0 deletions integration/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ var tests = [
require('./validate.js'),
require('./dynamic_templates.js'),
require('./analyzer_peliasIndexOneEdgeGram.js'),
require('./analyzer_peliasQuery.js'),
require('./analyzer_peliasQueryPartialToken.js'),
require('./analyzer_peliasQueryFullToken.js'),
require('./analyzer_peliasPhrase.js'),
Expand Down
14 changes: 14 additions & 0 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@ function generate(){
"notnull"
]
},
"peliasQuery": {
"type": "custom",
"tokenizer": "peliasNameTokenizer",
"char_filter": ["punctuation", "nfkc_normalizer"],
"filter": [
"icu_folding",
"lowercase",
"trim",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique_only_same_position",
"notnull"
]
},
"peliasQueryPartialToken" : {
"type": "custom",
"tokenizer" : "peliasNameTokenizer",
Expand Down
17 changes: 17 additions & 0 deletions test/fixtures/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,23 @@
"notnull"
]
},
"peliasQuery": {
"type": "custom",
"tokenizer": "peliasNameTokenizer",
"char_filter": [
"punctuation",
"nfkc_normalizer"
],
"filter": [
"icu_folding",
"lowercase",
"trim",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique_only_same_position",
"notnull"
]
},
"peliasQueryPartialToken": {
"type": "custom",
"tokenizer": "peliasNameTokenizer",
Expand Down
28 changes: 27 additions & 1 deletion test/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,32 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) {
});
};

module.exports.tests.peliasQueryAnalyzer = function (test, common) {
test('has peliasQuery analyzer', function (t) {
var s = settings();
t.equal(typeof s.analysis.analyzer.peliasQuery, 'object', 'there is a peliasQuery analyzer');
var analyzer = s.analysis.analyzer.peliasQuery;
t.equal(analyzer.type, 'custom', 'custom analyzer');
t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
t.true(Array.isArray(analyzer.filter), 'filters specified');
t.end();
});
test('peliasQuery token filters', function (t) {
var analyzer = settings().analysis.analyzer.peliasQuery;
t.deepEqual(analyzer.filter, [
'icu_folding',
'lowercase',
'trim',
'remove_ordinals',
'removeAllZeroNumericPrefix',
'unique_only_same_position',
'notnull'
]);
t.end();
});
};

module.exports.tests.peliasQueryFullTokenAnalyzer = function (test, common) {
test('has peliasQueryFullToken analyzer', function (t) {
var s = settings();
Expand Down Expand Up @@ -151,7 +177,7 @@ module.exports.tests.peliasQueryPartialTokenAnalyzer = function (test, common) {
"ampersand",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique",
"unique_only_same_position",
"notnull"
]);
t.end();
Expand Down

0 comments on commit e97a012

Please sign in to comment.