Skip to content

Commit

Permalink
Added separate methods for Levenshtein with search (#594)
Browse files Browse the repository at this point in the history
* Added separate methods for Levenshtein with search

* Added some spaces (coding standard)
  • Loading branch information
Hugo-ter-Doest authored Mar 26, 2021
1 parent dbccc24 commit d748c25
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 9 deletions.
2 changes: 2 additions & 0 deletions lib/natural/distance/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@ THE SOFTWARE.
exports.JaroWinklerDistance = require('./jaro-winkler_distance')
exports.LevenshteinDistance = require('./levenshtein_distance').LevenshteinDistance
exports.DamerauLevenshteinDistance = require('./levenshtein_distance').DamerauLevenshteinDistance
exports.LevenshteinDistanceSearch = require('./levenshtein_distance').LevenshteinDistanceSearch
exports.DamerauLevenshteinDistanceSearch = require('./levenshtein_distance').DamerauLevenshteinDistanceSearch
exports.DiceCoefficient = require('./dice_coefficient')
exports.HammingDistance = require('./hamming_distance')
22 changes: 19 additions & 3 deletions lib/natural/distance/levenshtein_distance.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,27 @@ function DamerauLevenshteinDistance (source, target, options) {
const damLevOptions = _.extend(
{ transposition_cost: 1, restricted: false },
options || {},
{ damerau: true }
{ damerau: true, search: false }
)
return levenshteinDistance(source, target, damLevOptions)
}

function DamerauLevenshteinDistanceSearch (source, target, options) {
const damLevOptions = _.extend(
{ transposition_cost: 1, restricted: false },
options || {},
{ damerau: true, search: true }
)
return levenshteinDistance(source, target, damLevOptions)
}

function LevenshteinDistanceSearch (source, target, options) {
const levOptions = _.extend({}, options || {}, { damerau: false, search: true })
return levenshteinDistance(source, target, levOptions)
}

function LevenshteinDistance (source, target, options) {
const levOptions = _.extend({}, options || {}, { damerau: false })
const levOptions = _.extend({}, options || {}, { damerau: false, search: false })
return levenshteinDistance(source, target, levOptions)
}

Expand Down Expand Up @@ -221,5 +235,7 @@ function levenshteinDistance (source, target, options) {

module.exports = {
LevenshteinDistance: LevenshteinDistance,
DamerauLevenshteinDistance: DamerauLevenshteinDistance
LevenshteinDistanceSearch: LevenshteinDistanceSearch,
DamerauLevenshteinDistance: DamerauLevenshteinDistance,
DamerauLevenshteinDistanceSearch: DamerauLevenshteinDistanceSearch
}
14 changes: 8 additions & 6 deletions spec/levenshtein_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,40 @@ THE SOFTWARE.

const levenshteinDistance =
require('../lib/natural/distance/levenshtein_distance').LevenshteinDistance
const levenshteinDistanceSearch =
require('../lib/natural/distance/levenshtein_distance').LevenshteinDistanceSearch

describe('levenshtein_distance', function () {
describe('options.search = true', function () {
it('should find cheapest substring', function () {
expect(levenshteinDistance('kitten', 'sitting', { search: true }))
expect(levenshteinDistanceSearch('kitten', 'sitting'))
.toEqual({ substring: 'sittin', distance: 2, offset: 0 })
})

it('should find 0 cost substring in target', function () {
expect(levenshteinDistance('doctor', 'the doctor is in', { search: true }))
expect(levenshteinDistanceSearch('doctor', 'the doctor is in'))
.toEqual({ substring: 'doctor', distance: 0, offset: 4 })
})

it('should find 1 cost substring in target', function () {
expect(levenshteinDistance('doctor', 'the doktor is in', { search: true }))
expect(levenshteinDistanceSearch('doctor', 'the doktor is in'))
.toEqual({ substring: 'doktor', distance: 1, offset: 4 })
})

it('should return empty substring when that is cleapest match', function () {
expect(levenshteinDistance('doctor', '000000000000', { search: true }))
expect(levenshteinDistanceSearch('doctor', '000000000000'))
.toEqual({ substring: '', distance: 6, offset: 0 })
})

it('different insertion costs should work', function () {
// delete 10 0's at cost 1 and insert the letters for doctor at cost -1
expect(levenshteinDistance('0000000000', 'doctor', { search: true, insertion_cost: -1 }))
expect(levenshteinDistanceSearch('0000000000', 'doctor', { insertion_cost: -1 }))
.toEqual({ substring: 'doctor', distance: 4, offset: 0 })
})

it('different deletion costs should work', function () {
// delete 10 0's at cost -10
expect(levenshteinDistance('0000000000', 'doctor', { search: true, deletion_cost: -1 }))
expect(levenshteinDistanceSearch('0000000000', 'doctor', { deletion_cost: -1 }))
.toEqual({ substring: '', distance: -10, offset: 0 })
})
})
Expand Down

0 comments on commit d748c25

Please sign in to comment.