diff --git a/classifier/CentralEuropeanStreetNameClassifier.js b/classifier/CentralEuropeanStreetNameClassifier.js new file mode 100644 index 00000000..5c1ea0c7 --- /dev/null +++ b/classifier/CentralEuropeanStreetNameClassifier.js @@ -0,0 +1,34 @@ +const _ = require('lodash') +const SectionClassifier = require('./super/SectionClassifier') +const StreetClassification = require('../classification/StreetClassification') + +/** + * Classifier which attempts to classify street names with no suffix or prefix + * when accompanied by a housenumber in the same section. + * + * see: https://github.com/pelias/parser/issues/83 + */ + +class CentralEuropeanStreetNameClassifier extends SectionClassifier { + each (section) { + // there must be excactly two childen in this section + // note: we may wish to relax/expand on this later + if (section.graph.length('child') !== 2) { return } + + // get first and last child + let children = section.graph.findAll('child') + let first = _.first(children) + let last = _.last(children) + + // section must end with a HouseNumberClassification + if (!last.classifications.hasOwnProperty('HouseNumberClassification')) { return } + + // other elements cannot contain any public classifications + if (_.some(first.classifications, (c) => c.public)) { return } + + // assume the first token is a street name + first.classify(new StreetClassification(0.5)) + } +} + +module.exports = CentralEuropeanStreetNameClassifier diff --git a/classifier/CentralEuropeanStreetNameClassifier.test.js b/classifier/CentralEuropeanStreetNameClassifier.test.js new file mode 100644 index 00000000..3b740267 --- /dev/null +++ b/classifier/CentralEuropeanStreetNameClassifier.test.js @@ -0,0 +1,52 @@ +const _ = require('lodash') +const CentralEuropeanStreetNameClassifier = require('./CentralEuropeanStreetNameClassifier') +const HouseNumberClassification = require('../classification/HouseNumberClassification') +const StreetClassification = require('../classification/StreetClassification') +const Span = require('../tokenization/Span') +const classifier = new CentralEuropeanStreetNameClassifier() + +module.exports.tests = {} +module.exports.tests.classify = (test) => { + let valid = [ + new Span('Foo 1').setChildren([ + new Span('Foo'), + new Span('1').classify(new HouseNumberClassification(1.0)) + ]), + new Span('Bar 2137').setChildren([ + new Span('Bar'), + new Span('2137').classify(new HouseNumberClassification(1.0)) + ]) + ] + + valid.forEach(s => { + test(`classify: ${s.body}`, (t) => { + // run classifier + classifier.each(s, null, 1) + + // get children + let children = s.graph.findAll('child') + + // first child should now be classified as a street + t.deepEqual(_.first(children).classifications, { + StreetClassification: new StreetClassification(0.5) + }) + + // last child was unchanged + t.deepEqual(_.last(children).classifications, { + HouseNumberClassification: new HouseNumberClassification(1) + }) + + t.end() + }) + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`CentralEuropeanStreetNameClassifier: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/parser/AddressParser.js b/parser/AddressParser.js index e6f6e8bf..28776a6f 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -20,6 +20,7 @@ const ChainClassifier = require('../classifier/ChainClassifier') const PlaceClassifier = require('../classifier/PlaceClassifier') const IntersectionClassifier = require('../classifier/IntersectionClassifier') // const MultiStreetClassifier = require('../classifier/MultiStreetClassifier') +const CentralEuropeanStreetNameClassifier = require('../classifier/CentralEuropeanStreetNameClassifier') const CompositeClassifier = require('../classifier/CompositeClassifier') const WhosOnFirstClassifier = require('../classifier/WhosOnFirstClassifier') // const AdjacencyClassifier = require('../classifier/AdjacencyClassifier') @@ -70,7 +71,10 @@ class AddressParser extends Parser { new CompositeClassifier(require('../classifier/scheme/street_name')), new CompositeClassifier(require('../classifier/scheme/street')), new CompositeClassifier(require('../classifier/scheme/place')), - new CompositeClassifier(require('../classifier/scheme/intersection')) + new CompositeClassifier(require('../classifier/scheme/intersection')), + + // additional classifiers which act on unclassified tokens + new CentralEuropeanStreetNameClassifier() ], // solvers [ diff --git a/test/address.cze.test.js b/test/address.cze.test.js new file mode 100644 index 00000000..45b0dcb5 --- /dev/null +++ b/test/address.cze.test.js @@ -0,0 +1,26 @@ +const testcase = (test, common) => { + let assert = common.assert(test) + + assert('Korunní 810, Praha', [ + { street: 'Korunní' }, { housenumber: '810' }, + { locality: 'Praha' } + ]) + + assert('Kájovská 68, Český Krumlov', [ + { street: 'Kájovská' }, { housenumber: '68' }, + { locality: 'Český Krumlov' } + ]) + + assert('Beethovenova 641/9, Brno', [ + { street: 'Beethovenova' }, { housenumber: '641/9' }, + { locality: 'Brno' } + ]) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`address CZEs: ${name}`, testFunction) + } + + testcase(test, common) +} diff --git a/test/address.hrv.test.js b/test/address.hrv.test.js new file mode 100644 index 00000000..e4716ca2 --- /dev/null +++ b/test/address.hrv.test.js @@ -0,0 +1,16 @@ +const testcase = (test, common) => { + let assert = common.assert(test) + + assert('Zadarska 17, Pula', [ + { street: 'Zadarska' }, { housenumber: '17' }, + { locality: 'Pula' } + ]) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`address HRV: ${name}`, testFunction) + } + + testcase(test, common) +} diff --git a/test/address.pol.test.js b/test/address.pol.test.js new file mode 100644 index 00000000..981b3f48 --- /dev/null +++ b/test/address.pol.test.js @@ -0,0 +1,16 @@ +const testcase = (test, common) => { + let assert = common.assert(test) + + assert('Szewska 6, Kraków', [ + { street: 'Szewska' }, { housenumber: '6' }, + { locality: 'Kraków' } + ]) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`address POL: ${name}`, testFunction) + } + + testcase(test, common) +} diff --git a/test/address.svk.test.js b/test/address.svk.test.js new file mode 100644 index 00000000..22bb4255 --- /dev/null +++ b/test/address.svk.test.js @@ -0,0 +1,16 @@ +const testcase = (test, common) => { + let assert = common.assert(test) + + assert('Divadelná 41/3, Trnava', [ + { street: 'Divadelná' }, { housenumber: '41/3' }, + { locality: 'Trnava' } + ]) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`address SVK: ${name}`, testFunction) + } + + testcase(test, common) +}